1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
17 #include "llvm/Analysis/Loads.h"
18 #include "llvm/Analysis/ValueTracking.h"
19 #include "llvm/Analysis/VectorUtils.h"
20 #include "llvm/IR/IntrinsicInst.h"
21 #include "llvm/IR/PatternMatch.h"
22 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
23 
24 using namespace llvm;
25 using namespace PatternMatch;
26 
27 #define LV_NAME "loop-vectorize"
28 #define DEBUG_TYPE LV_NAME
29 
30 extern cl::opt<bool> EnableVPlanPredication;
31 
32 static cl::opt<bool>
33     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
34                        cl::desc("Enable if-conversion during vectorization."));
35 
36 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
37     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
38     cl::desc("The maximum allowed number of runtime memory checks with a "
39              "vectorize(enable) pragma."));
40 
41 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
42     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
43     cl::desc("The maximum number of SCEV checks allowed."));
44 
45 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
46     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
47     cl::desc("The maximum number of SCEV checks allowed with a "
48              "vectorize(enable) pragma"));
49 
50 /// Maximum vectorization interleave count.
51 static const unsigned MaxInterleaveFactor = 16;
52 
53 namespace llvm {
54 
55 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
56   switch (Kind) {
57   case HK_WIDTH:
58     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
59   case HK_UNROLL:
60     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
61   case HK_FORCE:
62     return (Val <= 1);
63   case HK_ISVECTORIZED:
64   case HK_PREDICATE:
65     return (Val == 0 || Val == 1);
66   }
67   return false;
68 }
69 
70 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
71                                        bool InterleaveOnlyWhenForced,
72                                        OptimizationRemarkEmitter &ORE)
73     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
74       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
75       Force("vectorize.enable", FK_Undefined, HK_FORCE),
76       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
77       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
78       ORE(ORE) {
79   // Populate values with existing loop metadata.
80   getHintsFromMetadata();
81 
82   // force-vector-interleave overrides DisableInterleaving.
83   if (VectorizerParams::isInterleaveForced())
84     Interleave.Value = VectorizerParams::VectorizationInterleave;
85 
86   if (IsVectorized.Value != 1)
87     // If the vectorization width and interleaving count are both 1 then
88     // consider the loop to have been already vectorized because there's
89     // nothing more that we can do.
90     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
91   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
92              << "LV: Interleaving disabled by the pass manager\n");
93 }
94 
95 void LoopVectorizeHints::setAlreadyVectorized() {
96   LLVMContext &Context = TheLoop->getHeader()->getContext();
97 
98   MDNode *IsVectorizedMD = MDNode::get(
99       Context,
100       {MDString::get(Context, "llvm.loop.isvectorized"),
101        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
102   MDNode *LoopID = TheLoop->getLoopID();
103   MDNode *NewLoopID =
104       makePostTransformationMetadata(Context, LoopID,
105                                      {Twine(Prefix(), "vectorize.").str(),
106                                       Twine(Prefix(), "interleave.").str()},
107                                      {IsVectorizedMD});
108   TheLoop->setLoopID(NewLoopID);
109 
110   // Update internal cache.
111   IsVectorized.Value = 1;
112 }
113 
114 bool LoopVectorizeHints::allowVectorization(
115     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
116   if (getForce() == LoopVectorizeHints::FK_Disabled) {
117     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
118     emitRemarkWithHints();
119     return false;
120   }
121 
122   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
123     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
124     emitRemarkWithHints();
125     return false;
126   }
127 
128   if (getIsVectorized() == 1) {
129     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
130     // FIXME: Add interleave.disable metadata. This will allow
131     // vectorize.disable to be used without disabling the pass and errors
132     // to differentiate between disabled vectorization and a width of 1.
133     ORE.emit([&]() {
134       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
135                                         "AllDisabled", L->getStartLoc(),
136                                         L->getHeader())
137              << "loop not vectorized: vectorization and interleaving are "
138                 "explicitly disabled, or the loop has already been "
139                 "vectorized";
140     });
141     return false;
142   }
143 
144   return true;
145 }
146 
147 void LoopVectorizeHints::emitRemarkWithHints() const {
148   using namespace ore;
149 
150   ORE.emit([&]() {
151     if (Force.Value == LoopVectorizeHints::FK_Disabled)
152       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
153                                       TheLoop->getStartLoc(),
154                                       TheLoop->getHeader())
155              << "loop not vectorized: vectorization is explicitly disabled";
156     else {
157       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
158                                  TheLoop->getStartLoc(), TheLoop->getHeader());
159       R << "loop not vectorized";
160       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
161         R << " (Force=" << NV("Force", true);
162         if (Width.Value != 0)
163           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
164         if (Interleave.Value != 0)
165           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
166         R << ")";
167       }
168       return R;
169     }
170   });
171 }
172 
173 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
174   if (getWidth() == 1)
175     return LV_NAME;
176   if (getForce() == LoopVectorizeHints::FK_Disabled)
177     return LV_NAME;
178   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
179     return LV_NAME;
180   return OptimizationRemarkAnalysis::AlwaysPrint;
181 }
182 
183 void LoopVectorizeHints::getHintsFromMetadata() {
184   MDNode *LoopID = TheLoop->getLoopID();
185   if (!LoopID)
186     return;
187 
188   // First operand should refer to the loop id itself.
189   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
190   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
191 
192   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
193     const MDString *S = nullptr;
194     SmallVector<Metadata *, 4> Args;
195 
196     // The expected hint is either a MDString or a MDNode with the first
197     // operand a MDString.
198     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
199       if (!MD || MD->getNumOperands() == 0)
200         continue;
201       S = dyn_cast<MDString>(MD->getOperand(0));
202       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
203         Args.push_back(MD->getOperand(i));
204     } else {
205       S = dyn_cast<MDString>(LoopID->getOperand(i));
206       assert(Args.size() == 0 && "too many arguments for MDString");
207     }
208 
209     if (!S)
210       continue;
211 
212     // Check if the hint starts with the loop metadata prefix.
213     StringRef Name = S->getString();
214     if (Args.size() == 1)
215       setHint(Name, Args[0]);
216   }
217 }
218 
219 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
220   if (!Name.startswith(Prefix()))
221     return;
222   Name = Name.substr(Prefix().size(), StringRef::npos);
223 
224   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
225   if (!C)
226     return;
227   unsigned Val = C->getZExtValue();
228 
229   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
230   for (auto H : Hints) {
231     if (Name == H->Name) {
232       if (H->validate(Val))
233         H->Value = Val;
234       else
235         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
236       break;
237     }
238   }
239 }
240 
241 bool LoopVectorizationRequirements::doesNotMeet(
242     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
243   const char *PassName = Hints.vectorizeAnalysisPassName();
244   bool Failed = false;
245   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
246     ORE.emit([&]() {
247       return OptimizationRemarkAnalysisFPCommute(
248                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
249                  UnsafeAlgebraInst->getParent())
250              << "loop not vectorized: cannot prove it is safe to reorder "
251                 "floating-point operations";
252     });
253     Failed = true;
254   }
255 
256   // Test if runtime memcheck thresholds are exceeded.
257   bool PragmaThresholdReached =
258       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
259   bool ThresholdReached =
260       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
261   if ((ThresholdReached && !Hints.allowReordering()) ||
262       PragmaThresholdReached) {
263     ORE.emit([&]() {
264       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
265                                                 L->getStartLoc(),
266                                                 L->getHeader())
267              << "loop not vectorized: cannot prove it is safe to reorder "
268                 "memory operations";
269     });
270     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
271     Failed = true;
272   }
273 
274   return Failed;
275 }
276 
277 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
278 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
279 // executing the inner loop will execute the same iterations). This check is
280 // very constrained for now but it will be relaxed in the future. \p Lp is
281 // considered uniform if it meets all the following conditions:
282 //   1) it has a canonical IV (starting from 0 and with stride 1),
283 //   2) its latch terminator is a conditional branch and,
284 //   3) its latch condition is a compare instruction whose operands are the
285 //      canonical IV and an OuterLp invariant.
286 // This check doesn't take into account the uniformity of other conditions not
287 // related to the loop latch because they don't affect the loop uniformity.
288 //
289 // NOTE: We decided to keep all these checks and its associated documentation
290 // together so that we can easily have a picture of the current supported loop
291 // nests. However, some of the current checks don't depend on \p OuterLp and
292 // would be redundantly executed for each \p Lp if we invoked this function for
293 // different candidate outer loops. This is not the case for now because we
294 // don't currently have the infrastructure to evaluate multiple candidate outer
295 // loops and \p OuterLp will be a fixed parameter while we only support explicit
296 // outer loop vectorization. It's also very likely that these checks go away
297 // before introducing the aforementioned infrastructure. However, if this is not
298 // the case, we should move the \p OuterLp independent checks to a separate
299 // function that is only executed once for each \p Lp.
300 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
301   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
302 
303   // If Lp is the outer loop, it's uniform by definition.
304   if (Lp == OuterLp)
305     return true;
306   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
307 
308   // 1.
309   PHINode *IV = Lp->getCanonicalInductionVariable();
310   if (!IV) {
311     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
312     return false;
313   }
314 
315   // 2.
316   BasicBlock *Latch = Lp->getLoopLatch();
317   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
318   if (!LatchBr || LatchBr->isUnconditional()) {
319     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
320     return false;
321   }
322 
323   // 3.
324   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
325   if (!LatchCmp) {
326     LLVM_DEBUG(
327         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
328     return false;
329   }
330 
331   Value *CondOp0 = LatchCmp->getOperand(0);
332   Value *CondOp1 = LatchCmp->getOperand(1);
333   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
334   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
335       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
336     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
337     return false;
338   }
339 
340   return true;
341 }
342 
343 // Return true if \p Lp and all its nested loops are uniform with regard to \p
344 // OuterLp.
345 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
346   if (!isUniformLoop(Lp, OuterLp))
347     return false;
348 
349   // Check if nested loops are uniform.
350   for (Loop *SubLp : *Lp)
351     if (!isUniformLoopNest(SubLp, OuterLp))
352       return false;
353 
354   return true;
355 }
356 
357 /// Check whether it is safe to if-convert this phi node.
358 ///
359 /// Phi nodes with constant expressions that can trap are not safe to if
360 /// convert.
361 static bool canIfConvertPHINodes(BasicBlock *BB) {
362   for (PHINode &Phi : BB->phis()) {
363     for (Value *V : Phi.incoming_values())
364       if (auto *C = dyn_cast<Constant>(V))
365         if (C->canTrap())
366           return false;
367   }
368   return true;
369 }
370 
371 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
372   if (Ty->isPointerTy())
373     return DL.getIntPtrType(Ty);
374 
375   // It is possible that char's or short's overflow when we ask for the loop's
376   // trip count, work around this by changing the type size.
377   if (Ty->getScalarSizeInBits() < 32)
378     return Type::getInt32Ty(Ty->getContext());
379 
380   return Ty;
381 }
382 
383 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
384   Ty0 = convertPointerToIntegerType(DL, Ty0);
385   Ty1 = convertPointerToIntegerType(DL, Ty1);
386   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
387     return Ty0;
388   return Ty1;
389 }
390 
391 /// Check that the instruction has outside loop users and is not an
392 /// identified reduction variable.
393 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
394                                SmallPtrSetImpl<Value *> &AllowedExit) {
395   // Reductions, Inductions and non-header phis are allowed to have exit users. All
396   // other instructions must not have external users.
397   if (!AllowedExit.count(Inst))
398     // Check that all of the users of the loop are inside the BB.
399     for (User *U : Inst->users()) {
400       Instruction *UI = cast<Instruction>(U);
401       // This user may be a reduction exit value.
402       if (!TheLoop->contains(UI)) {
403         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
404         return true;
405       }
406     }
407   return false;
408 }
409 
410 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
411   const ValueToValueMap &Strides =
412       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
413 
414   bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
415   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
416   if (Stride == 1 || Stride == -1)
417     return Stride;
418   return 0;
419 }
420 
421 bool LoopVectorizationLegality::isUniform(Value *V) {
422   return LAI->isUniform(V);
423 }
424 
425 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
426   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
427   // Store the result and return it at the end instead of exiting early, in case
428   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
429   bool Result = true;
430   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
431 
432   for (BasicBlock *BB : TheLoop->blocks()) {
433     // Check whether the BB terminator is a BranchInst. Any other terminator is
434     // not supported yet.
435     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
436     if (!Br) {
437       reportVectorizationFailure("Unsupported basic block terminator",
438           "loop control flow is not understood by vectorizer",
439           "CFGNotUnderstood", ORE, TheLoop);
440       if (DoExtraAnalysis)
441         Result = false;
442       else
443         return false;
444     }
445 
446     // Check whether the BranchInst is a supported one. Only unconditional
447     // branches, conditional branches with an outer loop invariant condition or
448     // backedges are supported.
449     // FIXME: We skip these checks when VPlan predication is enabled as we
450     // want to allow divergent branches. This whole check will be removed
451     // once VPlan predication is on by default.
452     if (!EnableVPlanPredication && Br && Br->isConditional() &&
453         !TheLoop->isLoopInvariant(Br->getCondition()) &&
454         !LI->isLoopHeader(Br->getSuccessor(0)) &&
455         !LI->isLoopHeader(Br->getSuccessor(1))) {
456       reportVectorizationFailure("Unsupported conditional branch",
457           "loop control flow is not understood by vectorizer",
458           "CFGNotUnderstood", ORE, TheLoop);
459       if (DoExtraAnalysis)
460         Result = false;
461       else
462         return false;
463     }
464   }
465 
466   // Check whether inner loops are uniform. At this point, we only support
467   // simple outer loops scenarios with uniform nested loops.
468   if (!isUniformLoopNest(TheLoop /*loop nest*/,
469                          TheLoop /*context outer loop*/)) {
470     reportVectorizationFailure("Outer loop contains divergent loops",
471         "loop control flow is not understood by vectorizer",
472         "CFGNotUnderstood", ORE, TheLoop);
473     if (DoExtraAnalysis)
474       Result = false;
475     else
476       return false;
477   }
478 
479   // Check whether we are able to set up outer loop induction.
480   if (!setupOuterLoopInductions()) {
481     reportVectorizationFailure("Unsupported outer loop Phi(s)",
482                                "Unsupported outer loop Phi(s)",
483                                "UnsupportedPhi", ORE, TheLoop);
484     if (DoExtraAnalysis)
485       Result = false;
486     else
487       return false;
488   }
489 
490   return Result;
491 }
492 
493 void LoopVectorizationLegality::addInductionPhi(
494     PHINode *Phi, const InductionDescriptor &ID,
495     SmallPtrSetImpl<Value *> &AllowedExit) {
496   Inductions[Phi] = ID;
497 
498   // In case this induction also comes with casts that we know we can ignore
499   // in the vectorized loop body, record them here. All casts could be recorded
500   // here for ignoring, but suffices to record only the first (as it is the
501   // only one that may bw used outside the cast sequence).
502   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
503   if (!Casts.empty())
504     InductionCastsToIgnore.insert(*Casts.begin());
505 
506   Type *PhiTy = Phi->getType();
507   const DataLayout &DL = Phi->getModule()->getDataLayout();
508 
509   // Get the widest type.
510   if (!PhiTy->isFloatingPointTy()) {
511     if (!WidestIndTy)
512       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
513     else
514       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
515   }
516 
517   // Int inductions are special because we only allow one IV.
518   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
519       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
520       isa<Constant>(ID.getStartValue()) &&
521       cast<Constant>(ID.getStartValue())->isNullValue()) {
522 
523     // Use the phi node with the widest type as induction. Use the last
524     // one if there are multiple (no good reason for doing this other
525     // than it is expedient). We've checked that it begins at zero and
526     // steps by one, so this is a canonical induction variable.
527     if (!PrimaryInduction || PhiTy == WidestIndTy)
528       PrimaryInduction = Phi;
529   }
530 
531   // Both the PHI node itself, and the "post-increment" value feeding
532   // back into the PHI node may have external users.
533   // We can allow those uses, except if the SCEVs we have for them rely
534   // on predicates that only hold within the loop, since allowing the exit
535   // currently means re-using this SCEV outside the loop (see PR33706 for more
536   // details).
537   if (PSE.getUnionPredicate().isAlwaysTrue()) {
538     AllowedExit.insert(Phi);
539     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
540   }
541 
542   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
543 }
544 
545 bool LoopVectorizationLegality::setupOuterLoopInductions() {
546   BasicBlock *Header = TheLoop->getHeader();
547 
548   // Returns true if a given Phi is a supported induction.
549   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
550     InductionDescriptor ID;
551     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
552         ID.getKind() == InductionDescriptor::IK_IntInduction) {
553       addInductionPhi(&Phi, ID, AllowedExit);
554       return true;
555     } else {
556       // Bail out for any Phi in the outer loop header that is not a supported
557       // induction.
558       LLVM_DEBUG(
559           dbgs()
560           << "LV: Found unsupported PHI for outer loop vectorization.\n");
561       return false;
562     }
563   };
564 
565   if (llvm::all_of(Header->phis(), isSupportedPhi))
566     return true;
567   else
568     return false;
569 }
570 
571 /// Checks if a function is scalarizable according to the TLI, in
572 /// the sense that it should be vectorized and then expanded in
573 /// multiple scalarcalls. This is represented in the
574 /// TLI via mappings that do not specify a vector name, as in the
575 /// following example:
576 ///
577 ///    const VecDesc VecIntrinsics[] = {
578 ///      {"llvm.phx.abs.i32", "", 4}
579 ///    };
580 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
581   const StringRef ScalarName = CI.getCalledFunction()->getName();
582   bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
583   // Check that all known VFs are not associated to a vector
584   // function, i.e. the vector name is emty.
585   if (Scalarize)
586     for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName);
587          VF <= WidestVF; VF *= 2) {
588       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
589     }
590   return Scalarize;
591 }
592 
593 bool LoopVectorizationLegality::canVectorizeInstrs() {
594   BasicBlock *Header = TheLoop->getHeader();
595 
596   // Look for the attribute signaling the absence of NaNs.
597   Function &F = *Header->getParent();
598   HasFunNoNaNAttr =
599       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
600 
601   // For each block in the loop.
602   for (BasicBlock *BB : TheLoop->blocks()) {
603     // Scan the instructions in the block and look for hazards.
604     for (Instruction &I : *BB) {
605       if (auto *Phi = dyn_cast<PHINode>(&I)) {
606         Type *PhiTy = Phi->getType();
607         // Check that this PHI type is allowed.
608         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
609             !PhiTy->isPointerTy()) {
610           reportVectorizationFailure("Found a non-int non-pointer PHI",
611                                      "loop control flow is not understood by vectorizer",
612                                      "CFGNotUnderstood", ORE, TheLoop);
613           return false;
614         }
615 
616         // If this PHINode is not in the header block, then we know that we
617         // can convert it to select during if-conversion. No need to check if
618         // the PHIs in this block are induction or reduction variables.
619         if (BB != Header) {
620           // Non-header phi nodes that have outside uses can be vectorized. Add
621           // them to the list of allowed exits.
622           // Unsafe cyclic dependencies with header phis are identified during
623           // legalization for reduction, induction and first order
624           // recurrences.
625           AllowedExit.insert(&I);
626           continue;
627         }
628 
629         // We only allow if-converted PHIs with exactly two incoming values.
630         if (Phi->getNumIncomingValues() != 2) {
631           reportVectorizationFailure("Found an invalid PHI",
632               "loop control flow is not understood by vectorizer",
633               "CFGNotUnderstood", ORE, TheLoop, Phi);
634           return false;
635         }
636 
637         RecurrenceDescriptor RedDes;
638         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
639                                                  DT)) {
640           if (RedDes.hasUnsafeAlgebra())
641             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
642           AllowedExit.insert(RedDes.getLoopExitInstr());
643           Reductions[Phi] = RedDes;
644           continue;
645         }
646 
647         // TODO: Instead of recording the AllowedExit, it would be good to record the
648         // complementary set: NotAllowedExit. These include (but may not be
649         // limited to):
650         // 1. Reduction phis as they represent the one-before-last value, which
651         // is not available when vectorized
652         // 2. Induction phis and increment when SCEV predicates cannot be used
653         // outside the loop - see addInductionPhi
654         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
655         // outside the loop - see call to hasOutsideLoopUser in the non-phi
656         // handling below
657         // 4. FirstOrderRecurrence phis that can possibly be handled by
658         // extraction.
659         // By recording these, we can then reason about ways to vectorize each
660         // of these NotAllowedExit.
661         InductionDescriptor ID;
662         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
663           addInductionPhi(Phi, ID, AllowedExit);
664           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
665             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
666           continue;
667         }
668 
669         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
670                                                          SinkAfter, DT)) {
671           AllowedExit.insert(Phi);
672           FirstOrderRecurrences.insert(Phi);
673           continue;
674         }
675 
676         // As a last resort, coerce the PHI to a AddRec expression
677         // and re-try classifying it a an induction PHI.
678         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
679           addInductionPhi(Phi, ID, AllowedExit);
680           continue;
681         }
682 
683         reportVectorizationFailure("Found an unidentified PHI",
684             "value that could not be identified as "
685             "reduction is used outside the loop",
686             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
687         return false;
688       } // end of PHI handling
689 
690       // We handle calls that:
691       //   * Are debug info intrinsics.
692       //   * Have a mapping to an IR intrinsic.
693       //   * Have a vector version available.
694       auto *CI = dyn_cast<CallInst>(&I);
695 
696       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
697           !isa<DbgInfoIntrinsic>(CI) &&
698           !(CI->getCalledFunction() && TLI &&
699             (!VFDatabase::getMappings(*CI).empty() ||
700              isTLIScalarize(*TLI, *CI)))) {
701         // If the call is a recognized math libary call, it is likely that
702         // we can vectorize it given loosened floating-point constraints.
703         LibFunc Func;
704         bool IsMathLibCall =
705             TLI && CI->getCalledFunction() &&
706             CI->getType()->isFloatingPointTy() &&
707             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
708             TLI->hasOptimizedCodeGen(Func);
709 
710         if (IsMathLibCall) {
711           // TODO: Ideally, we should not use clang-specific language here,
712           // but it's hard to provide meaningful yet generic advice.
713           // Also, should this be guarded by allowExtraAnalysis() and/or be part
714           // of the returned info from isFunctionVectorizable()?
715           reportVectorizationFailure(
716               "Found a non-intrinsic callsite",
717               "library call cannot be vectorized. "
718               "Try compiling with -fno-math-errno, -ffast-math, "
719               "or similar flags",
720               "CantVectorizeLibcall", ORE, TheLoop, CI);
721         } else {
722           reportVectorizationFailure("Found a non-intrinsic callsite",
723                                      "call instruction cannot be vectorized",
724                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
725         }
726         return false;
727       }
728 
729       // Some intrinsics have scalar arguments and should be same in order for
730       // them to be vectorized (i.e. loop invariant).
731       if (CI) {
732         auto *SE = PSE.getSE();
733         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
734         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
735           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
736             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
737               reportVectorizationFailure("Found unvectorizable intrinsic",
738                   "intrinsic instruction cannot be vectorized",
739                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
740               return false;
741             }
742           }
743       }
744 
745       // Check that the instruction return type is vectorizable.
746       // Also, we can't vectorize extractelement instructions.
747       if ((!VectorType::isValidElementType(I.getType()) &&
748            !I.getType()->isVoidTy()) ||
749           isa<ExtractElementInst>(I)) {
750         reportVectorizationFailure("Found unvectorizable type",
751             "instruction return type cannot be vectorized",
752             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
753         return false;
754       }
755 
756       // Check that the stored type is vectorizable.
757       if (auto *ST = dyn_cast<StoreInst>(&I)) {
758         Type *T = ST->getValueOperand()->getType();
759         if (!VectorType::isValidElementType(T)) {
760           reportVectorizationFailure("Store instruction cannot be vectorized",
761                                      "store instruction cannot be vectorized",
762                                      "CantVectorizeStore", ORE, TheLoop, ST);
763           return false;
764         }
765 
766         // For nontemporal stores, check that a nontemporal vector version is
767         // supported on the target.
768         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
769           // Arbitrarily try a vector of 2 elements.
770           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
771           assert(VecTy && "did not find vectorized version of stored type");
772           const MaybeAlign Alignment = getLoadStoreAlignment(ST);
773           assert(Alignment && "Alignment should be set");
774           if (!TTI->isLegalNTStore(VecTy, *Alignment)) {
775             reportVectorizationFailure(
776                 "nontemporal store instruction cannot be vectorized",
777                 "nontemporal store instruction cannot be vectorized",
778                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
779             return false;
780           }
781         }
782 
783       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
784         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
785           // For nontemporal loads, check that a nontemporal vector version is
786           // supported on the target (arbitrarily try a vector of 2 elements).
787           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
788           assert(VecTy && "did not find vectorized version of load type");
789           const MaybeAlign Alignment = getLoadStoreAlignment(LD);
790           assert(Alignment && "Alignment should be set");
791           if (!TTI->isLegalNTLoad(VecTy, *Alignment)) {
792             reportVectorizationFailure(
793                 "nontemporal load instruction cannot be vectorized",
794                 "nontemporal load instruction cannot be vectorized",
795                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
796             return false;
797           }
798         }
799 
800         // FP instructions can allow unsafe algebra, thus vectorizable by
801         // non-IEEE-754 compliant SIMD units.
802         // This applies to floating-point math operations and calls, not memory
803         // operations, shuffles, or casts, as they don't change precision or
804         // semantics.
805       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
806                  !I.isFast()) {
807         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
808         Hints->setPotentiallyUnsafe();
809       }
810 
811       // Reduction instructions are allowed to have exit users.
812       // All other instructions must not have external users.
813       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
814         // We can safely vectorize loops where instructions within the loop are
815         // used outside the loop only if the SCEV predicates within the loop is
816         // same as outside the loop. Allowing the exit means reusing the SCEV
817         // outside the loop.
818         if (PSE.getUnionPredicate().isAlwaysTrue()) {
819           AllowedExit.insert(&I);
820           continue;
821         }
822         reportVectorizationFailure("Value cannot be used outside the loop",
823                                    "value cannot be used outside the loop",
824                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
825         return false;
826       }
827     } // next instr.
828   }
829 
830   if (!PrimaryInduction) {
831     if (Inductions.empty()) {
832       reportVectorizationFailure("Did not find one integer induction var",
833           "loop induction variable could not be identified",
834           "NoInductionVariable", ORE, TheLoop);
835       return false;
836     } else if (!WidestIndTy) {
837       reportVectorizationFailure("Did not find one integer induction var",
838           "integer loop induction variable could not be identified",
839           "NoIntegerInductionVariable", ORE, TheLoop);
840       return false;
841     } else {
842       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
843     }
844   }
845 
846   // For first order recurrences, we use the previous value (incoming value from
847   // the latch) to check if it dominates all users of the recurrence. Bail out
848   // if we have to sink such an instruction for another recurrence, as the
849   // dominance requirement may not hold after sinking.
850   BasicBlock *LoopLatch = TheLoop->getLoopLatch();
851   if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
852         Instruction *V =
853             cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
854         return SinkAfter.find(V) != SinkAfter.end();
855       }))
856     return false;
857 
858   // Now we know the widest induction type, check if our found induction
859   // is the same size. If it's not, unset it here and InnerLoopVectorizer
860   // will create another.
861   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
862     PrimaryInduction = nullptr;
863 
864   return true;
865 }
866 
867 bool LoopVectorizationLegality::canVectorizeMemory() {
868   LAI = &(*GetLAA)(*TheLoop);
869   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
870   if (LAR) {
871     ORE->emit([&]() {
872       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
873                                         "loop not vectorized: ", *LAR);
874     });
875   }
876   if (!LAI->canVectorizeMemory())
877     return false;
878 
879   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
880     reportVectorizationFailure("Stores to a uniform address",
881         "write to a loop invariant address could not be vectorized",
882         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
883     return false;
884   }
885   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
886   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
887 
888   return true;
889 }
890 
891 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
892   Value *In0 = const_cast<Value *>(V);
893   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
894   if (!PN)
895     return false;
896 
897   return Inductions.count(PN);
898 }
899 
900 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
901   auto *Inst = dyn_cast<Instruction>(V);
902   return (Inst && InductionCastsToIgnore.count(Inst));
903 }
904 
905 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
906   return isInductionPhi(V) || isCastedInductionVariable(V);
907 }
908 
909 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
910   return FirstOrderRecurrences.count(Phi);
911 }
912 
913 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
914   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
915 }
916 
917 bool LoopVectorizationLegality::blockCanBePredicated(
918     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
919   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
920 
921   for (Instruction &I : *BB) {
922     // Check that we don't have a constant expression that can trap as operand.
923     for (Value *Operand : I.operands()) {
924       if (auto *C = dyn_cast<Constant>(Operand))
925         if (C->canTrap())
926           return false;
927     }
928 
929     // We can predicate blocks with calls to assume, as long as we drop them in
930     // case we flatten the CFG via predication.
931     if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
932       ConditionalAssumes.insert(&I);
933       continue;
934     }
935 
936     // We might be able to hoist the load.
937     if (I.mayReadFromMemory()) {
938       auto *LI = dyn_cast<LoadInst>(&I);
939       if (!LI)
940         return false;
941       if (!SafePtrs.count(LI->getPointerOperand())) {
942         // !llvm.mem.parallel_loop_access implies if-conversion safety.
943         // Otherwise, record that the load needs (real or emulated) masking
944         // and let the cost model decide.
945         if (!IsAnnotatedParallel || PreserveGuards)
946           MaskedOp.insert(LI);
947         continue;
948       }
949     }
950 
951     if (I.mayWriteToMemory()) {
952       auto *SI = dyn_cast<StoreInst>(&I);
953       if (!SI)
954         return false;
955       // Predicated store requires some form of masking:
956       // 1) masked store HW instruction,
957       // 2) emulation via load-blend-store (only if safe and legal to do so,
958       //    be aware on the race conditions), or
959       // 3) element-by-element predicate check and scalar store.
960       MaskedOp.insert(SI);
961       continue;
962     }
963     if (I.mayThrow())
964       return false;
965   }
966 
967   return true;
968 }
969 
970 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
971   if (!EnableIfConversion) {
972     reportVectorizationFailure("If-conversion is disabled",
973                                "if-conversion is disabled",
974                                "IfConversionDisabled",
975                                ORE, TheLoop);
976     return false;
977   }
978 
979   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
980 
981   // A list of pointers which are known to be dereferenceable within scope of
982   // the loop body for each iteration of the loop which executes.  That is,
983   // the memory pointed to can be dereferenced (with the access size implied by
984   // the value's type) unconditionally within the loop header without
985   // introducing a new fault.
986   SmallPtrSet<Value *, 8> SafePointes;
987 
988   // Collect safe addresses.
989   for (BasicBlock *BB : TheLoop->blocks()) {
990     if (!blockNeedsPredication(BB)) {
991       for (Instruction &I : *BB)
992         if (auto *Ptr = getLoadStorePointerOperand(&I))
993           SafePointes.insert(Ptr);
994       continue;
995     }
996 
997     // For a block which requires predication, a address may be safe to access
998     // in the loop w/o predication if we can prove dereferenceability facts
999     // sufficient to ensure it'll never fault within the loop. For the moment,
1000     // we restrict this to loads; stores are more complicated due to
1001     // concurrency restrictions.
1002     ScalarEvolution &SE = *PSE.getSE();
1003     for (Instruction &I : *BB) {
1004       LoadInst *LI = dyn_cast<LoadInst>(&I);
1005       if (LI && !mustSuppressSpeculation(*LI) &&
1006           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
1007         SafePointes.insert(LI->getPointerOperand());
1008     }
1009   }
1010 
1011   // Collect the blocks that need predication.
1012   BasicBlock *Header = TheLoop->getHeader();
1013   for (BasicBlock *BB : TheLoop->blocks()) {
1014     // We don't support switch statements inside loops.
1015     if (!isa<BranchInst>(BB->getTerminator())) {
1016       reportVectorizationFailure("Loop contains a switch statement",
1017                                  "loop contains a switch statement",
1018                                  "LoopContainsSwitch", ORE, TheLoop,
1019                                  BB->getTerminator());
1020       return false;
1021     }
1022 
1023     // We must be able to predicate all blocks that need to be predicated.
1024     if (blockNeedsPredication(BB)) {
1025       if (!blockCanBePredicated(BB, SafePointes)) {
1026         reportVectorizationFailure(
1027             "Control flow cannot be substituted for a select",
1028             "control flow cannot be substituted for a select",
1029             "NoCFGForSelect", ORE, TheLoop,
1030             BB->getTerminator());
1031         return false;
1032       }
1033     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
1034       reportVectorizationFailure(
1035           "Control flow cannot be substituted for a select",
1036           "control flow cannot be substituted for a select",
1037           "NoCFGForSelect", ORE, TheLoop,
1038           BB->getTerminator());
1039       return false;
1040     }
1041   }
1042 
1043   // We can if-convert this loop.
1044   return true;
1045 }
1046 
1047 // Helper function to canVectorizeLoopNestCFG.
1048 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1049                                                     bool UseVPlanNativePath) {
1050   assert((UseVPlanNativePath || Lp->empty()) &&
1051          "VPlan-native path is not enabled.");
1052 
1053   // TODO: ORE should be improved to show more accurate information when an
1054   // outer loop can't be vectorized because a nested loop is not understood or
1055   // legal. Something like: "outer_loop_location: loop not vectorized:
1056   // (inner_loop_location) loop control flow is not understood by vectorizer".
1057 
1058   // Store the result and return it at the end instead of exiting early, in case
1059   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1060   bool Result = true;
1061   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1062 
1063   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1064   // be canonicalized.
1065   if (!Lp->getLoopPreheader()) {
1066     reportVectorizationFailure("Loop doesn't have a legal pre-header",
1067         "loop control flow is not understood by vectorizer",
1068         "CFGNotUnderstood", ORE, TheLoop);
1069     if (DoExtraAnalysis)
1070       Result = false;
1071     else
1072       return false;
1073   }
1074 
1075   // We must have a single backedge.
1076   if (Lp->getNumBackEdges() != 1) {
1077     reportVectorizationFailure("The loop must have a single backedge",
1078         "loop control flow is not understood by vectorizer",
1079         "CFGNotUnderstood", ORE, TheLoop);
1080     if (DoExtraAnalysis)
1081       Result = false;
1082     else
1083       return false;
1084   }
1085 
1086   // We must have a single exiting block.
1087   if (!Lp->getExitingBlock()) {
1088     reportVectorizationFailure("The loop must have an exiting block",
1089         "loop control flow is not understood by vectorizer",
1090         "CFGNotUnderstood", ORE, TheLoop);
1091     if (DoExtraAnalysis)
1092       Result = false;
1093     else
1094       return false;
1095   }
1096 
1097   // We only handle bottom-tested loops, i.e. loop in which the condition is
1098   // checked at the end of each iteration. With that we can assume that all
1099   // instructions in the loop are executed the same number of times.
1100   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1101     reportVectorizationFailure("The exiting block is not the loop latch",
1102         "loop control flow is not understood by vectorizer",
1103         "CFGNotUnderstood", ORE, TheLoop);
1104     if (DoExtraAnalysis)
1105       Result = false;
1106     else
1107       return false;
1108   }
1109 
1110   return Result;
1111 }
1112 
1113 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1114     Loop *Lp, bool UseVPlanNativePath) {
1115   // Store the result and return it at the end instead of exiting early, in case
1116   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1117   bool Result = true;
1118   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1119   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1120     if (DoExtraAnalysis)
1121       Result = false;
1122     else
1123       return false;
1124   }
1125 
1126   // Recursively check whether the loop control flow of nested loops is
1127   // understood.
1128   for (Loop *SubLp : *Lp)
1129     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1130       if (DoExtraAnalysis)
1131         Result = false;
1132       else
1133         return false;
1134     }
1135 
1136   return Result;
1137 }
1138 
1139 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1140   // Store the result and return it at the end instead of exiting early, in case
1141   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1142   bool Result = true;
1143 
1144   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1145   // Check whether the loop-related control flow in the loop nest is expected by
1146   // vectorizer.
1147   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1148     if (DoExtraAnalysis)
1149       Result = false;
1150     else
1151       return false;
1152   }
1153 
1154   // We need to have a loop header.
1155   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1156                     << '\n');
1157 
1158   // Specific checks for outer loops. We skip the remaining legal checks at this
1159   // point because they don't support outer loops.
1160   if (!TheLoop->empty()) {
1161     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1162 
1163     if (!canVectorizeOuterLoop()) {
1164       reportVectorizationFailure("Unsupported outer loop",
1165                                  "unsupported outer loop",
1166                                  "UnsupportedOuterLoop",
1167                                  ORE, TheLoop);
1168       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1169       // outer loops.
1170       return false;
1171     }
1172 
1173     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1174     return Result;
1175   }
1176 
1177   assert(TheLoop->empty() && "Inner loop expected.");
1178   // Check if we can if-convert non-single-bb loops.
1179   unsigned NumBlocks = TheLoop->getNumBlocks();
1180   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1181     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1182     if (DoExtraAnalysis)
1183       Result = false;
1184     else
1185       return false;
1186   }
1187 
1188   // Check if we can vectorize the instructions and CFG in this loop.
1189   if (!canVectorizeInstrs()) {
1190     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1191     if (DoExtraAnalysis)
1192       Result = false;
1193     else
1194       return false;
1195   }
1196 
1197   // Go over each instruction and look at memory deps.
1198   if (!canVectorizeMemory()) {
1199     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1200     if (DoExtraAnalysis)
1201       Result = false;
1202     else
1203       return false;
1204   }
1205 
1206   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1207                     << (LAI->getRuntimePointerChecking()->Need
1208                             ? " (with a runtime bound check)"
1209                             : "")
1210                     << "!\n");
1211 
1212   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1213   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1214     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1215 
1216   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1217     reportVectorizationFailure("Too many SCEV checks needed",
1218         "Too many SCEV assumptions need to be made and checked at runtime",
1219         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1220     if (DoExtraAnalysis)
1221       Result = false;
1222     else
1223       return false;
1224   }
1225 
1226   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1227   // we can vectorize, and at this point we don't have any other mem analysis
1228   // which may limit our maximum vectorization factor, so just return true with
1229   // no restrictions.
1230   return Result;
1231 }
1232 
1233 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1234 
1235   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1236 
1237   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1238 
1239   for (auto &Reduction : getReductionVars())
1240     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1241 
1242   // TODO: handle non-reduction outside users when tail is folded by masking.
1243   for (auto *AE : AllowedExit) {
1244     // Check that all users of allowed exit values are inside the loop or
1245     // are the live-out of a reduction.
1246     if (ReductionLiveOuts.count(AE))
1247       continue;
1248     for (User *U : AE->users()) {
1249       Instruction *UI = cast<Instruction>(U);
1250       if (TheLoop->contains(UI))
1251         continue;
1252       reportVectorizationFailure(
1253           "Cannot fold tail by masking, loop has an outside user for",
1254           "Cannot fold tail by masking in the presence of live outs.",
1255           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1256       return false;
1257     }
1258   }
1259 
1260   // The list of pointers that we can safely read and write to remains empty.
1261   SmallPtrSet<Value *, 8> SafePointers;
1262 
1263   // Check and mark all blocks for predication, including those that ordinarily
1264   // do not need predication such as the header block.
1265   for (BasicBlock *BB : TheLoop->blocks()) {
1266     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1267       reportVectorizationFailure(
1268           "Cannot fold tail by masking as required",
1269           "control flow cannot be substituted for a select",
1270           "NoCFGForSelect", ORE, TheLoop,
1271           BB->getTerminator());
1272       return false;
1273     }
1274   }
1275 
1276   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1277   return true;
1278 }
1279 
1280 } // namespace llvm
1281