1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
17 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
18 #include "llvm/Analysis/VectorUtils.h"
19 #include "llvm/IR/IntrinsicInst.h"
20 
21 using namespace llvm;
22 
23 #define LV_NAME "loop-vectorize"
24 #define DEBUG_TYPE LV_NAME
25 
26 extern cl::opt<bool> EnableVPlanPredication;
27 
28 static cl::opt<bool>
29     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
30                        cl::desc("Enable if-conversion during vectorization."));
31 
32 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
33     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
34     cl::desc("The maximum allowed number of runtime memory checks with a "
35              "vectorize(enable) pragma."));
36 
37 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
38     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
39     cl::desc("The maximum number of SCEV checks allowed."));
40 
41 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
42     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
43     cl::desc("The maximum number of SCEV checks allowed with a "
44              "vectorize(enable) pragma"));
45 
46 /// Maximum vectorization interleave count.
47 static const unsigned MaxInterleaveFactor = 16;
48 
49 namespace llvm {
50 
51 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
52   switch (Kind) {
53   case HK_WIDTH:
54     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
55   case HK_UNROLL:
56     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
57   case HK_FORCE:
58     return (Val <= 1);
59   case HK_ISVECTORIZED:
60   case HK_PREDICATE:
61     return (Val == 0 || Val == 1);
62   }
63   return false;
64 }
65 
66 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
67                                        bool InterleaveOnlyWhenForced,
68                                        OptimizationRemarkEmitter &ORE)
69     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
70       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
71       Force("vectorize.enable", FK_Undefined, HK_FORCE),
72       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
73       Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
74       ORE(ORE) {
75   // Populate values with existing loop metadata.
76   getHintsFromMetadata();
77 
78   // force-vector-interleave overrides DisableInterleaving.
79   if (VectorizerParams::isInterleaveForced())
80     Interleave.Value = VectorizerParams::VectorizationInterleave;
81 
82   if (IsVectorized.Value != 1)
83     // If the vectorization width and interleaving count are both 1 then
84     // consider the loop to have been already vectorized because there's
85     // nothing more that we can do.
86     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
87   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
88              << "LV: Interleaving disabled by the pass manager\n");
89 }
90 
91 void LoopVectorizeHints::setAlreadyVectorized() {
92   LLVMContext &Context = TheLoop->getHeader()->getContext();
93 
94   MDNode *IsVectorizedMD = MDNode::get(
95       Context,
96       {MDString::get(Context, "llvm.loop.isvectorized"),
97        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
98   MDNode *LoopID = TheLoop->getLoopID();
99   MDNode *NewLoopID =
100       makePostTransformationMetadata(Context, LoopID,
101                                      {Twine(Prefix(), "vectorize.").str(),
102                                       Twine(Prefix(), "interleave.").str()},
103                                      {IsVectorizedMD});
104   TheLoop->setLoopID(NewLoopID);
105 
106   // Update internal cache.
107   IsVectorized.Value = 1;
108 }
109 
110 bool LoopVectorizeHints::allowVectorization(
111     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
112   if (getForce() == LoopVectorizeHints::FK_Disabled) {
113     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
114     emitRemarkWithHints();
115     return false;
116   }
117 
118   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
119     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
120     emitRemarkWithHints();
121     return false;
122   }
123 
124   if (getIsVectorized() == 1) {
125     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
126     // FIXME: Add interleave.disable metadata. This will allow
127     // vectorize.disable to be used without disabling the pass and errors
128     // to differentiate between disabled vectorization and a width of 1.
129     ORE.emit([&]() {
130       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
131                                         "AllDisabled", L->getStartLoc(),
132                                         L->getHeader())
133              << "loop not vectorized: vectorization and interleaving are "
134                 "explicitly disabled, or the loop has already been "
135                 "vectorized";
136     });
137     return false;
138   }
139 
140   return true;
141 }
142 
143 void LoopVectorizeHints::emitRemarkWithHints() const {
144   using namespace ore;
145 
146   ORE.emit([&]() {
147     if (Force.Value == LoopVectorizeHints::FK_Disabled)
148       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
149                                       TheLoop->getStartLoc(),
150                                       TheLoop->getHeader())
151              << "loop not vectorized: vectorization is explicitly disabled";
152     else {
153       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
154                                  TheLoop->getStartLoc(), TheLoop->getHeader());
155       R << "loop not vectorized";
156       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
157         R << " (Force=" << NV("Force", true);
158         if (Width.Value != 0)
159           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
160         if (Interleave.Value != 0)
161           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
162         R << ")";
163       }
164       return R;
165     }
166   });
167 }
168 
169 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
170   if (getWidth() == 1)
171     return LV_NAME;
172   if (getForce() == LoopVectorizeHints::FK_Disabled)
173     return LV_NAME;
174   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
175     return LV_NAME;
176   return OptimizationRemarkAnalysis::AlwaysPrint;
177 }
178 
179 void LoopVectorizeHints::getHintsFromMetadata() {
180   MDNode *LoopID = TheLoop->getLoopID();
181   if (!LoopID)
182     return;
183 
184   // First operand should refer to the loop id itself.
185   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
186   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
187 
188   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
189     const MDString *S = nullptr;
190     SmallVector<Metadata *, 4> Args;
191 
192     // The expected hint is either a MDString or a MDNode with the first
193     // operand a MDString.
194     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
195       if (!MD || MD->getNumOperands() == 0)
196         continue;
197       S = dyn_cast<MDString>(MD->getOperand(0));
198       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
199         Args.push_back(MD->getOperand(i));
200     } else {
201       S = dyn_cast<MDString>(LoopID->getOperand(i));
202       assert(Args.size() == 0 && "too many arguments for MDString");
203     }
204 
205     if (!S)
206       continue;
207 
208     // Check if the hint starts with the loop metadata prefix.
209     StringRef Name = S->getString();
210     if (Args.size() == 1)
211       setHint(Name, Args[0]);
212   }
213 }
214 
215 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
216   if (!Name.startswith(Prefix()))
217     return;
218   Name = Name.substr(Prefix().size(), StringRef::npos);
219 
220   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
221   if (!C)
222     return;
223   unsigned Val = C->getZExtValue();
224 
225   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
226   for (auto H : Hints) {
227     if (Name == H->Name) {
228       if (H->validate(Val))
229         H->Value = Val;
230       else
231         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
232       break;
233     }
234   }
235 }
236 
237 bool LoopVectorizationRequirements::doesNotMeet(
238     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
239   const char *PassName = Hints.vectorizeAnalysisPassName();
240   bool Failed = false;
241   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
242     ORE.emit([&]() {
243       return OptimizationRemarkAnalysisFPCommute(
244                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
245                  UnsafeAlgebraInst->getParent())
246              << "loop not vectorized: cannot prove it is safe to reorder "
247                 "floating-point operations";
248     });
249     Failed = true;
250   }
251 
252   // Test if runtime memcheck thresholds are exceeded.
253   bool PragmaThresholdReached =
254       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
255   bool ThresholdReached =
256       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
257   if ((ThresholdReached && !Hints.allowReordering()) ||
258       PragmaThresholdReached) {
259     ORE.emit([&]() {
260       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
261                                                 L->getStartLoc(),
262                                                 L->getHeader())
263              << "loop not vectorized: cannot prove it is safe to reorder "
264                 "memory operations";
265     });
266     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
267     Failed = true;
268   }
269 
270   return Failed;
271 }
272 
273 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
274 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
275 // executing the inner loop will execute the same iterations). This check is
276 // very constrained for now but it will be relaxed in the future. \p Lp is
277 // considered uniform if it meets all the following conditions:
278 //   1) it has a canonical IV (starting from 0 and with stride 1),
279 //   2) its latch terminator is a conditional branch and,
280 //   3) its latch condition is a compare instruction whose operands are the
281 //      canonical IV and an OuterLp invariant.
282 // This check doesn't take into account the uniformity of other conditions not
283 // related to the loop latch because they don't affect the loop uniformity.
284 //
285 // NOTE: We decided to keep all these checks and its associated documentation
286 // together so that we can easily have a picture of the current supported loop
287 // nests. However, some of the current checks don't depend on \p OuterLp and
288 // would be redundantly executed for each \p Lp if we invoked this function for
289 // different candidate outer loops. This is not the case for now because we
290 // don't currently have the infrastructure to evaluate multiple candidate outer
291 // loops and \p OuterLp will be a fixed parameter while we only support explicit
292 // outer loop vectorization. It's also very likely that these checks go away
293 // before introducing the aforementioned infrastructure. However, if this is not
294 // the case, we should move the \p OuterLp independent checks to a separate
295 // function that is only executed once for each \p Lp.
296 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
297   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
298 
299   // If Lp is the outer loop, it's uniform by definition.
300   if (Lp == OuterLp)
301     return true;
302   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
303 
304   // 1.
305   PHINode *IV = Lp->getCanonicalInductionVariable();
306   if (!IV) {
307     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
308     return false;
309   }
310 
311   // 2.
312   BasicBlock *Latch = Lp->getLoopLatch();
313   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
314   if (!LatchBr || LatchBr->isUnconditional()) {
315     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
316     return false;
317   }
318 
319   // 3.
320   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
321   if (!LatchCmp) {
322     LLVM_DEBUG(
323         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
324     return false;
325   }
326 
327   Value *CondOp0 = LatchCmp->getOperand(0);
328   Value *CondOp1 = LatchCmp->getOperand(1);
329   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
330   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
331       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
332     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
333     return false;
334   }
335 
336   return true;
337 }
338 
339 // Return true if \p Lp and all its nested loops are uniform with regard to \p
340 // OuterLp.
341 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
342   if (!isUniformLoop(Lp, OuterLp))
343     return false;
344 
345   // Check if nested loops are uniform.
346   for (Loop *SubLp : *Lp)
347     if (!isUniformLoopNest(SubLp, OuterLp))
348       return false;
349 
350   return true;
351 }
352 
353 /// Check whether it is safe to if-convert this phi node.
354 ///
355 /// Phi nodes with constant expressions that can trap are not safe to if
356 /// convert.
357 static bool canIfConvertPHINodes(BasicBlock *BB) {
358   for (PHINode &Phi : BB->phis()) {
359     for (Value *V : Phi.incoming_values())
360       if (auto *C = dyn_cast<Constant>(V))
361         if (C->canTrap())
362           return false;
363   }
364   return true;
365 }
366 
367 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
368   if (Ty->isPointerTy())
369     return DL.getIntPtrType(Ty);
370 
371   // It is possible that char's or short's overflow when we ask for the loop's
372   // trip count, work around this by changing the type size.
373   if (Ty->getScalarSizeInBits() < 32)
374     return Type::getInt32Ty(Ty->getContext());
375 
376   return Ty;
377 }
378 
379 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
380   Ty0 = convertPointerToIntegerType(DL, Ty0);
381   Ty1 = convertPointerToIntegerType(DL, Ty1);
382   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
383     return Ty0;
384   return Ty1;
385 }
386 
387 /// Check that the instruction has outside loop users and is not an
388 /// identified reduction variable.
389 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
390                                SmallPtrSetImpl<Value *> &AllowedExit) {
391   // Reductions, Inductions and non-header phis are allowed to have exit users. All
392   // other instructions must not have external users.
393   if (!AllowedExit.count(Inst))
394     // Check that all of the users of the loop are inside the BB.
395     for (User *U : Inst->users()) {
396       Instruction *UI = cast<Instruction>(U);
397       // This user may be a reduction exit value.
398       if (!TheLoop->contains(UI)) {
399         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
400         return true;
401       }
402     }
403   return false;
404 }
405 
406 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
407   const ValueToValueMap &Strides =
408       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
409 
410   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
411   if (Stride == 1 || Stride == -1)
412     return Stride;
413   return 0;
414 }
415 
416 bool LoopVectorizationLegality::isUniform(Value *V) {
417   return LAI->isUniform(V);
418 }
419 
420 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
421   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
422   // Store the result and return it at the end instead of exiting early, in case
423   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
424   bool Result = true;
425   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
426 
427   for (BasicBlock *BB : TheLoop->blocks()) {
428     // Check whether the BB terminator is a BranchInst. Any other terminator is
429     // not supported yet.
430     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
431     if (!Br) {
432       reportVectorizationFailure("Unsupported basic block terminator",
433           "loop control flow is not understood by vectorizer",
434           "CFGNotUnderstood", ORE, TheLoop);
435       if (DoExtraAnalysis)
436         Result = false;
437       else
438         return false;
439     }
440 
441     // Check whether the BranchInst is a supported one. Only unconditional
442     // branches, conditional branches with an outer loop invariant condition or
443     // backedges are supported.
444     // FIXME: We skip these checks when VPlan predication is enabled as we
445     // want to allow divergent branches. This whole check will be removed
446     // once VPlan predication is on by default.
447     if (!EnableVPlanPredication && Br && Br->isConditional() &&
448         !TheLoop->isLoopInvariant(Br->getCondition()) &&
449         !LI->isLoopHeader(Br->getSuccessor(0)) &&
450         !LI->isLoopHeader(Br->getSuccessor(1))) {
451       reportVectorizationFailure("Unsupported conditional branch",
452           "loop control flow is not understood by vectorizer",
453           "CFGNotUnderstood", ORE, TheLoop);
454       if (DoExtraAnalysis)
455         Result = false;
456       else
457         return false;
458     }
459   }
460 
461   // Check whether inner loops are uniform. At this point, we only support
462   // simple outer loops scenarios with uniform nested loops.
463   if (!isUniformLoopNest(TheLoop /*loop nest*/,
464                          TheLoop /*context outer loop*/)) {
465     reportVectorizationFailure("Outer loop contains divergent loops",
466         "loop control flow is not understood by vectorizer",
467         "CFGNotUnderstood", ORE, TheLoop);
468     if (DoExtraAnalysis)
469       Result = false;
470     else
471       return false;
472   }
473 
474   // Check whether we are able to set up outer loop induction.
475   if (!setupOuterLoopInductions()) {
476     reportVectorizationFailure("Unsupported outer loop Phi(s)",
477                                "Unsupported outer loop Phi(s)",
478                                "UnsupportedPhi", ORE, TheLoop);
479     if (DoExtraAnalysis)
480       Result = false;
481     else
482       return false;
483   }
484 
485   return Result;
486 }
487 
488 void LoopVectorizationLegality::addInductionPhi(
489     PHINode *Phi, const InductionDescriptor &ID,
490     SmallPtrSetImpl<Value *> &AllowedExit) {
491   Inductions[Phi] = ID;
492 
493   // In case this induction also comes with casts that we know we can ignore
494   // in the vectorized loop body, record them here. All casts could be recorded
495   // here for ignoring, but suffices to record only the first (as it is the
496   // only one that may bw used outside the cast sequence).
497   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
498   if (!Casts.empty())
499     InductionCastsToIgnore.insert(*Casts.begin());
500 
501   Type *PhiTy = Phi->getType();
502   const DataLayout &DL = Phi->getModule()->getDataLayout();
503 
504   // Get the widest type.
505   if (!PhiTy->isFloatingPointTy()) {
506     if (!WidestIndTy)
507       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
508     else
509       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
510   }
511 
512   // Int inductions are special because we only allow one IV.
513   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
514       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
515       isa<Constant>(ID.getStartValue()) &&
516       cast<Constant>(ID.getStartValue())->isNullValue()) {
517 
518     // Use the phi node with the widest type as induction. Use the last
519     // one if there are multiple (no good reason for doing this other
520     // than it is expedient). We've checked that it begins at zero and
521     // steps by one, so this is a canonical induction variable.
522     if (!PrimaryInduction || PhiTy == WidestIndTy)
523       PrimaryInduction = Phi;
524   }
525 
526   // Both the PHI node itself, and the "post-increment" value feeding
527   // back into the PHI node may have external users.
528   // We can allow those uses, except if the SCEVs we have for them rely
529   // on predicates that only hold within the loop, since allowing the exit
530   // currently means re-using this SCEV outside the loop (see PR33706 for more
531   // details).
532   if (PSE.getUnionPredicate().isAlwaysTrue()) {
533     AllowedExit.insert(Phi);
534     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
535   }
536 
537   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
538 }
539 
540 bool LoopVectorizationLegality::setupOuterLoopInductions() {
541   BasicBlock *Header = TheLoop->getHeader();
542 
543   // Returns true if a given Phi is a supported induction.
544   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
545     InductionDescriptor ID;
546     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
547         ID.getKind() == InductionDescriptor::IK_IntInduction) {
548       addInductionPhi(&Phi, ID, AllowedExit);
549       return true;
550     } else {
551       // Bail out for any Phi in the outer loop header that is not a supported
552       // induction.
553       LLVM_DEBUG(
554           dbgs()
555           << "LV: Found unsupported PHI for outer loop vectorization.\n");
556       return false;
557     }
558   };
559 
560   if (llvm::all_of(Header->phis(), isSupportedPhi))
561     return true;
562   else
563     return false;
564 }
565 
566 bool LoopVectorizationLegality::canVectorizeInstrs() {
567   BasicBlock *Header = TheLoop->getHeader();
568 
569   // Look for the attribute signaling the absence of NaNs.
570   Function &F = *Header->getParent();
571   HasFunNoNaNAttr =
572       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
573 
574   // For each block in the loop.
575   for (BasicBlock *BB : TheLoop->blocks()) {
576     // Scan the instructions in the block and look for hazards.
577     for (Instruction &I : *BB) {
578       if (auto *Phi = dyn_cast<PHINode>(&I)) {
579         Type *PhiTy = Phi->getType();
580         // Check that this PHI type is allowed.
581         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
582             !PhiTy->isPointerTy()) {
583           reportVectorizationFailure("Found a non-int non-pointer PHI",
584                                      "loop control flow is not understood by vectorizer",
585                                      "CFGNotUnderstood", ORE, TheLoop);
586           return false;
587         }
588 
589         // If this PHINode is not in the header block, then we know that we
590         // can convert it to select during if-conversion. No need to check if
591         // the PHIs in this block are induction or reduction variables.
592         if (BB != Header) {
593           // Non-header phi nodes that have outside uses can be vectorized. Add
594           // them to the list of allowed exits.
595           // Unsafe cyclic dependencies with header phis are identified during
596           // legalization for reduction, induction and first order
597           // recurrences.
598           continue;
599         }
600 
601         // We only allow if-converted PHIs with exactly two incoming values.
602         if (Phi->getNumIncomingValues() != 2) {
603           reportVectorizationFailure("Found an invalid PHI",
604               "loop control flow is not understood by vectorizer",
605               "CFGNotUnderstood", ORE, TheLoop, Phi);
606           return false;
607         }
608 
609         RecurrenceDescriptor RedDes;
610         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
611                                                  DT)) {
612           if (RedDes.hasUnsafeAlgebra())
613             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
614           AllowedExit.insert(RedDes.getLoopExitInstr());
615           Reductions[Phi] = RedDes;
616           continue;
617         }
618 
619         // TODO: Instead of recording the AllowedExit, it would be good to record the
620         // complementary set: NotAllowedExit. These include (but may not be
621         // limited to):
622         // 1. Reduction phis as they represent the one-before-last value, which
623         // is not available when vectorized
624         // 2. Induction phis and increment when SCEV predicates cannot be used
625         // outside the loop - see addInductionPhi
626         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
627         // outside the loop - see call to hasOutsideLoopUser in the non-phi
628         // handling below
629         // 4. FirstOrderRecurrence phis that can possibly be handled by
630         // extraction.
631         // By recording these, we can then reason about ways to vectorize each
632         // of these NotAllowedExit.
633         InductionDescriptor ID;
634         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
635           addInductionPhi(Phi, ID, AllowedExit);
636           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
637             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
638           continue;
639         }
640 
641         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
642                                                          SinkAfter, DT)) {
643           FirstOrderRecurrences.insert(Phi);
644           continue;
645         }
646 
647         // As a last resort, coerce the PHI to a AddRec expression
648         // and re-try classifying it a an induction PHI.
649         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
650           addInductionPhi(Phi, ID, AllowedExit);
651           continue;
652         }
653 
654         reportVectorizationFailure("Found an unidentified PHI",
655             "value that could not be identified as "
656             "reduction is used outside the loop",
657             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
658         return false;
659       } // end of PHI handling
660 
661       // We handle calls that:
662       //   * Are debug info intrinsics.
663       //   * Have a mapping to an IR intrinsic.
664       //   * Have a vector version available.
665       auto *CI = dyn_cast<CallInst>(&I);
666       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
667           !isa<DbgInfoIntrinsic>(CI) &&
668           !(CI->getCalledFunction() && TLI &&
669             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
670         // If the call is a recognized math libary call, it is likely that
671         // we can vectorize it given loosened floating-point constraints.
672         LibFunc Func;
673         bool IsMathLibCall =
674             TLI && CI->getCalledFunction() &&
675             CI->getType()->isFloatingPointTy() &&
676             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
677             TLI->hasOptimizedCodeGen(Func);
678 
679         if (IsMathLibCall) {
680           // TODO: Ideally, we should not use clang-specific language here,
681           // but it's hard to provide meaningful yet generic advice.
682           // Also, should this be guarded by allowExtraAnalysis() and/or be part
683           // of the returned info from isFunctionVectorizable()?
684           reportVectorizationFailure("Found a non-intrinsic callsite",
685               "library call cannot be vectorized. "
686               "Try compiling with -fno-math-errno, -ffast-math, "
687               "or similar flags",
688               "CantVectorizeLibcall", ORE, TheLoop, CI);
689         } else {
690           reportVectorizationFailure("Found a non-intrinsic callsite",
691                                      "call instruction cannot be vectorized",
692                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
693         }
694         return false;
695       }
696 
697       // Some intrinsics have scalar arguments and should be same in order for
698       // them to be vectorized (i.e. loop invariant).
699       if (CI) {
700         auto *SE = PSE.getSE();
701         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
702         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
703           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
704             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
705               reportVectorizationFailure("Found unvectorizable intrinsic",
706                   "intrinsic instruction cannot be vectorized",
707                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
708               return false;
709             }
710           }
711       }
712 
713       // Check that the instruction return type is vectorizable.
714       // Also, we can't vectorize extractelement instructions.
715       if ((!VectorType::isValidElementType(I.getType()) &&
716            !I.getType()->isVoidTy()) ||
717           isa<ExtractElementInst>(I)) {
718         reportVectorizationFailure("Found unvectorizable type",
719             "instruction return type cannot be vectorized",
720             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
721         return false;
722       }
723 
724       // Check that the stored type is vectorizable.
725       if (auto *ST = dyn_cast<StoreInst>(&I)) {
726         Type *T = ST->getValueOperand()->getType();
727         if (!VectorType::isValidElementType(T)) {
728           reportVectorizationFailure("Store instruction cannot be vectorized",
729                                      "store instruction cannot be vectorized",
730                                      "CantVectorizeStore", ORE, TheLoop, ST);
731           return false;
732         }
733 
734         // For nontemporal stores, check that a nontemporal vector version is
735         // supported on the target.
736         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
737           // Arbitrarily try a vector of 2 elements.
738           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
739           assert(VecTy && "did not find vectorized version of stored type");
740           unsigned Alignment = getLoadStoreAlignment(ST);
741           if (!TTI->isLegalNTStore(VecTy, Alignment)) {
742             reportVectorizationFailure(
743                 "nontemporal store instruction cannot be vectorized",
744                 "nontemporal store instruction cannot be vectorized",
745                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
746             return false;
747           }
748         }
749 
750       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
751         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
752           // For nontemporal loads, check that a nontemporal vector version is
753           // supported on the target (arbitrarily try a vector of 2 elements).
754           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
755           assert(VecTy && "did not find vectorized version of load type");
756           unsigned Alignment = getLoadStoreAlignment(LD);
757           if (!TTI->isLegalNTLoad(VecTy, Alignment)) {
758             reportVectorizationFailure(
759                 "nontemporal load instruction cannot be vectorized",
760                 "nontemporal load instruction cannot be vectorized",
761                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
762             return false;
763           }
764         }
765 
766         // FP instructions can allow unsafe algebra, thus vectorizable by
767         // non-IEEE-754 compliant SIMD units.
768         // This applies to floating-point math operations and calls, not memory
769         // operations, shuffles, or casts, as they don't change precision or
770         // semantics.
771       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
772                  !I.isFast()) {
773         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
774         Hints->setPotentiallyUnsafe();
775       }
776 
777       // Reduction instructions are allowed to have exit users.
778       // All other instructions must not have external users.
779       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
780         // We can safely vectorize loops where instructions within the loop are
781         // used outside the loop only if the SCEV predicates within the loop is
782         // same as outside the loop. Allowing the exit means reusing the SCEV
783         // outside the loop.
784         if (PSE.getUnionPredicate().isAlwaysTrue()) {
785           AllowedExit.insert(&I);
786           continue;
787         }
788         reportVectorizationFailure("Value cannot be used outside the loop",
789                                    "value cannot be used outside the loop",
790                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
791         return false;
792       }
793     } // next instr.
794   }
795 
796   if (!PrimaryInduction) {
797     if (Inductions.empty()) {
798       reportVectorizationFailure("Did not find one integer induction var",
799           "loop induction variable could not be identified",
800           "NoInductionVariable", ORE, TheLoop);
801       return false;
802     } else if (!WidestIndTy) {
803       reportVectorizationFailure("Did not find one integer induction var",
804           "integer loop induction variable could not be identified",
805           "NoIntegerInductionVariable", ORE, TheLoop);
806       return false;
807     } else {
808       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
809     }
810   }
811 
812   // Now we know the widest induction type, check if our found induction
813   // is the same size. If it's not, unset it here and InnerLoopVectorizer
814   // will create another.
815   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
816     PrimaryInduction = nullptr;
817 
818   return true;
819 }
820 
821 bool LoopVectorizationLegality::canVectorizeMemory() {
822   LAI = &(*GetLAA)(*TheLoop);
823   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
824   if (LAR) {
825     ORE->emit([&]() {
826       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
827                                         "loop not vectorized: ", *LAR);
828     });
829   }
830   if (!LAI->canVectorizeMemory())
831     return false;
832 
833   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
834     reportVectorizationFailure("Stores to a uniform address",
835         "write to a loop invariant address could not be vectorized",
836         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
837     return false;
838   }
839   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
840   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
841 
842   return true;
843 }
844 
845 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
846   Value *In0 = const_cast<Value *>(V);
847   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
848   if (!PN)
849     return false;
850 
851   return Inductions.count(PN);
852 }
853 
854 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
855   auto *Inst = dyn_cast<Instruction>(V);
856   return (Inst && InductionCastsToIgnore.count(Inst));
857 }
858 
859 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
860   return isInductionPhi(V) || isCastedInductionVariable(V);
861 }
862 
863 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
864   return FirstOrderRecurrences.count(Phi);
865 }
866 
867 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
868   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
869 }
870 
871 bool LoopVectorizationLegality::blockCanBePredicated(
872     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
873   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
874 
875   for (Instruction &I : *BB) {
876     // Check that we don't have a constant expression that can trap as operand.
877     for (Value *Operand : I.operands()) {
878       if (auto *C = dyn_cast<Constant>(Operand))
879         if (C->canTrap())
880           return false;
881     }
882     // We might be able to hoist the load.
883     if (I.mayReadFromMemory()) {
884       auto *LI = dyn_cast<LoadInst>(&I);
885       if (!LI)
886         return false;
887       if (!SafePtrs.count(LI->getPointerOperand())) {
888         // !llvm.mem.parallel_loop_access implies if-conversion safety.
889         // Otherwise, record that the load needs (real or emulated) masking
890         // and let the cost model decide.
891         if (!IsAnnotatedParallel || PreserveGuards)
892           MaskedOp.insert(LI);
893         continue;
894       }
895     }
896 
897     if (I.mayWriteToMemory()) {
898       auto *SI = dyn_cast<StoreInst>(&I);
899       if (!SI)
900         return false;
901       // Predicated store requires some form of masking:
902       // 1) masked store HW instruction,
903       // 2) emulation via load-blend-store (only if safe and legal to do so,
904       //    be aware on the race conditions), or
905       // 3) element-by-element predicate check and scalar store.
906       MaskedOp.insert(SI);
907       continue;
908     }
909     if (I.mayThrow())
910       return false;
911   }
912 
913   return true;
914 }
915 
916 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
917   if (!EnableIfConversion) {
918     reportVectorizationFailure("If-conversion is disabled",
919                                "if-conversion is disabled",
920                                "IfConversionDisabled",
921                                ORE, TheLoop);
922     return false;
923   }
924 
925   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
926 
927   // A list of pointers that we can safely read and write to.
928   SmallPtrSet<Value *, 8> SafePointes;
929 
930   // Collect safe addresses.
931   for (BasicBlock *BB : TheLoop->blocks()) {
932     if (blockNeedsPredication(BB))
933       continue;
934 
935     for (Instruction &I : *BB)
936       if (auto *Ptr = getLoadStorePointerOperand(&I))
937         SafePointes.insert(Ptr);
938   }
939 
940   // Collect the blocks that need predication.
941   BasicBlock *Header = TheLoop->getHeader();
942   for (BasicBlock *BB : TheLoop->blocks()) {
943     // We don't support switch statements inside loops.
944     if (!isa<BranchInst>(BB->getTerminator())) {
945       reportVectorizationFailure("Loop contains a switch statement",
946                                  "loop contains a switch statement",
947                                  "LoopContainsSwitch", ORE, TheLoop,
948                                  BB->getTerminator());
949       return false;
950     }
951 
952     // We must be able to predicate all blocks that need to be predicated.
953     if (blockNeedsPredication(BB)) {
954       if (!blockCanBePredicated(BB, SafePointes)) {
955         reportVectorizationFailure(
956             "Control flow cannot be substituted for a select",
957             "control flow cannot be substituted for a select",
958             "NoCFGForSelect", ORE, TheLoop,
959             BB->getTerminator());
960         return false;
961       }
962     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
963       reportVectorizationFailure(
964           "Control flow cannot be substituted for a select",
965           "control flow cannot be substituted for a select",
966           "NoCFGForSelect", ORE, TheLoop,
967           BB->getTerminator());
968       return false;
969     }
970   }
971 
972   // We can if-convert this loop.
973   return true;
974 }
975 
976 // Helper function to canVectorizeLoopNestCFG.
977 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
978                                                     bool UseVPlanNativePath) {
979   assert((UseVPlanNativePath || Lp->empty()) &&
980          "VPlan-native path is not enabled.");
981 
982   // TODO: ORE should be improved to show more accurate information when an
983   // outer loop can't be vectorized because a nested loop is not understood or
984   // legal. Something like: "outer_loop_location: loop not vectorized:
985   // (inner_loop_location) loop control flow is not understood by vectorizer".
986 
987   // Store the result and return it at the end instead of exiting early, in case
988   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
989   bool Result = true;
990   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
991 
992   // We must have a loop in canonical form. Loops with indirectbr in them cannot
993   // be canonicalized.
994   if (!Lp->getLoopPreheader()) {
995     reportVectorizationFailure("Loop doesn't have a legal pre-header",
996         "loop control flow is not understood by vectorizer",
997         "CFGNotUnderstood", ORE, TheLoop);
998     if (DoExtraAnalysis)
999       Result = false;
1000     else
1001       return false;
1002   }
1003 
1004   // We must have a single backedge.
1005   if (Lp->getNumBackEdges() != 1) {
1006     reportVectorizationFailure("The loop must have a single backedge",
1007         "loop control flow is not understood by vectorizer",
1008         "CFGNotUnderstood", ORE, TheLoop);
1009     if (DoExtraAnalysis)
1010       Result = false;
1011     else
1012       return false;
1013   }
1014 
1015   // We must have a single exiting block.
1016   if (!Lp->getExitingBlock()) {
1017     reportVectorizationFailure("The loop must have an exiting block",
1018         "loop control flow is not understood by vectorizer",
1019         "CFGNotUnderstood", ORE, TheLoop);
1020     if (DoExtraAnalysis)
1021       Result = false;
1022     else
1023       return false;
1024   }
1025 
1026   // We only handle bottom-tested loops, i.e. loop in which the condition is
1027   // checked at the end of each iteration. With that we can assume that all
1028   // instructions in the loop are executed the same number of times.
1029   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1030     reportVectorizationFailure("The exiting block is not the loop latch",
1031         "loop control flow is not understood by vectorizer",
1032         "CFGNotUnderstood", ORE, TheLoop);
1033     if (DoExtraAnalysis)
1034       Result = false;
1035     else
1036       return false;
1037   }
1038 
1039   return Result;
1040 }
1041 
1042 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1043     Loop *Lp, bool UseVPlanNativePath) {
1044   // Store the result and return it at the end instead of exiting early, in case
1045   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1046   bool Result = true;
1047   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1048   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1049     if (DoExtraAnalysis)
1050       Result = false;
1051     else
1052       return false;
1053   }
1054 
1055   // Recursively check whether the loop control flow of nested loops is
1056   // understood.
1057   for (Loop *SubLp : *Lp)
1058     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1059       if (DoExtraAnalysis)
1060         Result = false;
1061       else
1062         return false;
1063     }
1064 
1065   return Result;
1066 }
1067 
1068 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1069   // Store the result and return it at the end instead of exiting early, in case
1070   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1071   bool Result = true;
1072 
1073   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1074   // Check whether the loop-related control flow in the loop nest is expected by
1075   // vectorizer.
1076   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1077     if (DoExtraAnalysis)
1078       Result = false;
1079     else
1080       return false;
1081   }
1082 
1083   // We need to have a loop header.
1084   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1085                     << '\n');
1086 
1087   // Specific checks for outer loops. We skip the remaining legal checks at this
1088   // point because they don't support outer loops.
1089   if (!TheLoop->empty()) {
1090     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1091 
1092     if (!canVectorizeOuterLoop()) {
1093       reportVectorizationFailure("Unsupported outer loop",
1094                                  "unsupported outer loop",
1095                                  "UnsupportedOuterLoop",
1096                                  ORE, TheLoop);
1097       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1098       // outer loops.
1099       return false;
1100     }
1101 
1102     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1103     return Result;
1104   }
1105 
1106   assert(TheLoop->empty() && "Inner loop expected.");
1107   // Check if we can if-convert non-single-bb loops.
1108   unsigned NumBlocks = TheLoop->getNumBlocks();
1109   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1110     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1111     if (DoExtraAnalysis)
1112       Result = false;
1113     else
1114       return false;
1115   }
1116 
1117   // Check if we can vectorize the instructions and CFG in this loop.
1118   if (!canVectorizeInstrs()) {
1119     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1120     if (DoExtraAnalysis)
1121       Result = false;
1122     else
1123       return false;
1124   }
1125 
1126   // Go over each instruction and look at memory deps.
1127   if (!canVectorizeMemory()) {
1128     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1129     if (DoExtraAnalysis)
1130       Result = false;
1131     else
1132       return false;
1133   }
1134 
1135   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1136                     << (LAI->getRuntimePointerChecking()->Need
1137                             ? " (with a runtime bound check)"
1138                             : "")
1139                     << "!\n");
1140 
1141   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1142   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1143     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1144 
1145   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1146     reportVectorizationFailure("Too many SCEV checks needed",
1147         "Too many SCEV assumptions need to be made and checked at runtime",
1148         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1149     if (DoExtraAnalysis)
1150       Result = false;
1151     else
1152       return false;
1153   }
1154 
1155   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1156   // we can vectorize, and at this point we don't have any other mem analysis
1157   // which may limit our maximum vectorization factor, so just return true with
1158   // no restrictions.
1159   return Result;
1160 }
1161 
1162 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1163 
1164   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1165 
1166   if (!PrimaryInduction) {
1167     reportVectorizationFailure(
1168         "No primary induction, cannot fold tail by masking",
1169         "Missing a primary induction variable in the loop, which is "
1170         "needed in order to fold tail by masking as required.",
1171         "NoPrimaryInduction", ORE, TheLoop);
1172     return false;
1173   }
1174 
1175   // TODO: handle reductions when tail is folded by masking.
1176   if (!Reductions.empty()) {
1177     reportVectorizationFailure(
1178         "Loop has reductions, cannot fold tail by masking",
1179         "Cannot fold tail by masking in the presence of reductions.",
1180         "ReductionFoldingTailByMasking", ORE, TheLoop);
1181     return false;
1182   }
1183 
1184   // TODO: handle outside users when tail is folded by masking.
1185   for (auto *AE : AllowedExit) {
1186     // Check that all users of allowed exit values are inside the loop.
1187     for (User *U : AE->users()) {
1188       Instruction *UI = cast<Instruction>(U);
1189       if (TheLoop->contains(UI))
1190         continue;
1191       reportVectorizationFailure(
1192           "Cannot fold tail by masking, loop has an outside user for",
1193           "Cannot fold tail by masking in the presence of live outs.",
1194           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1195       return false;
1196     }
1197   }
1198 
1199   // The list of pointers that we can safely read and write to remains empty.
1200   SmallPtrSet<Value *, 8> SafePointers;
1201 
1202   // Check and mark all blocks for predication, including those that ordinarily
1203   // do not need predication such as the header block.
1204   for (BasicBlock *BB : TheLoop->blocks()) {
1205     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1206       reportVectorizationFailure(
1207           "Cannot fold tail by masking as required",
1208           "control flow cannot be substituted for a select",
1209           "NoCFGForSelect", ORE, TheLoop,
1210           BB->getTerminator());
1211       return false;
1212     }
1213   }
1214 
1215   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1216   return true;
1217 }
1218 
1219 } // namespace llvm
1220