1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 
17 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
18 #include "llvm/Analysis/Loads.h"
19 #include "llvm/Analysis/LoopInfo.h"
20 #include "llvm/Analysis/TargetLibraryInfo.h"
21 #include "llvm/Analysis/ValueTracking.h"
22 #include "llvm/Analysis/VectorUtils.h"
23 #include "llvm/IR/IntrinsicInst.h"
24 #include "llvm/IR/PatternMatch.h"
25 #include "llvm/Transforms/Utils/SizeOpts.h"
26 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
27 
28 using namespace llvm;
29 using namespace PatternMatch;
30 
31 #define LV_NAME "loop-vectorize"
32 #define DEBUG_TYPE LV_NAME
33 
34 extern cl::opt<bool> EnableVPlanPredication;
35 
36 static cl::opt<bool>
37     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38                        cl::desc("Enable if-conversion during vectorization."));
39 
40 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
41     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
42     cl::desc("The maximum allowed number of runtime memory checks with a "
43              "vectorize(enable) pragma."));
44 
45 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
46     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
47     cl::desc("The maximum number of SCEV checks allowed."));
48 
49 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
50     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
51     cl::desc("The maximum number of SCEV checks allowed with a "
52              "vectorize(enable) pragma"));
53 
54 /// Maximum vectorization interleave count.
55 static const unsigned MaxInterleaveFactor = 16;
56 
57 namespace llvm {
58 
59 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
60   switch (Kind) {
61   case HK_WIDTH:
62     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
63   case HK_UNROLL:
64     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
65   case HK_FORCE:
66     return (Val <= 1);
67   case HK_ISVECTORIZED:
68   case HK_PREDICATE:
69     return (Val == 0 || Val == 1);
70   }
71   return false;
72 }
73 
74 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
75                                        bool InterleaveOnlyWhenForced,
76                                        OptimizationRemarkEmitter &ORE)
77     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
78       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
79       Force("vectorize.enable", FK_Undefined, HK_FORCE),
80       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
81       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
82       ORE(ORE) {
83   // Populate values with existing loop metadata.
84   getHintsFromMetadata();
85 
86   // force-vector-interleave overrides DisableInterleaving.
87   if (VectorizerParams::isInterleaveForced())
88     Interleave.Value = VectorizerParams::VectorizationInterleave;
89 
90   if (IsVectorized.Value != 1)
91     // If the vectorization width and interleaving count are both 1 then
92     // consider the loop to have been already vectorized because there's
93     // nothing more that we can do.
94     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
95   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
96              << "LV: Interleaving disabled by the pass manager\n");
97 }
98 
99 void LoopVectorizeHints::setAlreadyVectorized() {
100   LLVMContext &Context = TheLoop->getHeader()->getContext();
101 
102   MDNode *IsVectorizedMD = MDNode::get(
103       Context,
104       {MDString::get(Context, "llvm.loop.isvectorized"),
105        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
106   MDNode *LoopID = TheLoop->getLoopID();
107   MDNode *NewLoopID =
108       makePostTransformationMetadata(Context, LoopID,
109                                      {Twine(Prefix(), "vectorize.").str(),
110                                       Twine(Prefix(), "interleave.").str()},
111                                      {IsVectorizedMD});
112   TheLoop->setLoopID(NewLoopID);
113 
114   // Update internal cache.
115   IsVectorized.Value = 1;
116 }
117 
118 bool LoopVectorizeHints::allowVectorization(
119     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
120   if (getForce() == LoopVectorizeHints::FK_Disabled) {
121     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
122     emitRemarkWithHints();
123     return false;
124   }
125 
126   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
127     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
128     emitRemarkWithHints();
129     return false;
130   }
131 
132   if (getIsVectorized() == 1) {
133     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
134     // FIXME: Add interleave.disable metadata. This will allow
135     // vectorize.disable to be used without disabling the pass and errors
136     // to differentiate between disabled vectorization and a width of 1.
137     ORE.emit([&]() {
138       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
139                                         "AllDisabled", L->getStartLoc(),
140                                         L->getHeader())
141              << "loop not vectorized: vectorization and interleaving are "
142                 "explicitly disabled, or the loop has already been "
143                 "vectorized";
144     });
145     return false;
146   }
147 
148   return true;
149 }
150 
151 void LoopVectorizeHints::emitRemarkWithHints() const {
152   using namespace ore;
153 
154   ORE.emit([&]() {
155     if (Force.Value == LoopVectorizeHints::FK_Disabled)
156       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
157                                       TheLoop->getStartLoc(),
158                                       TheLoop->getHeader())
159              << "loop not vectorized: vectorization is explicitly disabled";
160     else {
161       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
162                                  TheLoop->getStartLoc(), TheLoop->getHeader());
163       R << "loop not vectorized";
164       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
165         R << " (Force=" << NV("Force", true);
166         if (Width.Value != 0)
167           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
168         if (Interleave.Value != 0)
169           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
170         R << ")";
171       }
172       return R;
173     }
174   });
175 }
176 
177 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
178   if (getWidth() == 1)
179     return LV_NAME;
180   if (getForce() == LoopVectorizeHints::FK_Disabled)
181     return LV_NAME;
182   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
183     return LV_NAME;
184   return OptimizationRemarkAnalysis::AlwaysPrint;
185 }
186 
187 void LoopVectorizeHints::getHintsFromMetadata() {
188   MDNode *LoopID = TheLoop->getLoopID();
189   if (!LoopID)
190     return;
191 
192   // First operand should refer to the loop id itself.
193   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
194   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
195 
196   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
197     const MDString *S = nullptr;
198     SmallVector<Metadata *, 4> Args;
199 
200     // The expected hint is either a MDString or a MDNode with the first
201     // operand a MDString.
202     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
203       if (!MD || MD->getNumOperands() == 0)
204         continue;
205       S = dyn_cast<MDString>(MD->getOperand(0));
206       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
207         Args.push_back(MD->getOperand(i));
208     } else {
209       S = dyn_cast<MDString>(LoopID->getOperand(i));
210       assert(Args.size() == 0 && "too many arguments for MDString");
211     }
212 
213     if (!S)
214       continue;
215 
216     // Check if the hint starts with the loop metadata prefix.
217     StringRef Name = S->getString();
218     if (Args.size() == 1)
219       setHint(Name, Args[0]);
220   }
221 }
222 
223 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
224   if (!Name.startswith(Prefix()))
225     return;
226   Name = Name.substr(Prefix().size(), StringRef::npos);
227 
228   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
229   if (!C)
230     return;
231   unsigned Val = C->getZExtValue();
232 
233   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
234   for (auto H : Hints) {
235     if (Name == H->Name) {
236       if (H->validate(Val))
237         H->Value = Val;
238       else
239         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
240       break;
241     }
242   }
243 }
244 
245 bool LoopVectorizationRequirements::doesNotMeet(
246     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
247   const char *PassName = Hints.vectorizeAnalysisPassName();
248   bool Failed = false;
249   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
250     ORE.emit([&]() {
251       return OptimizationRemarkAnalysisFPCommute(
252                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
253                  UnsafeAlgebraInst->getParent())
254              << "loop not vectorized: cannot prove it is safe to reorder "
255                 "floating-point operations";
256     });
257     Failed = true;
258   }
259 
260   // Test if runtime memcheck thresholds are exceeded.
261   bool PragmaThresholdReached =
262       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
263   bool ThresholdReached =
264       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
265   if ((ThresholdReached && !Hints.allowReordering()) ||
266       PragmaThresholdReached) {
267     ORE.emit([&]() {
268       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
269                                                 L->getStartLoc(),
270                                                 L->getHeader())
271              << "loop not vectorized: cannot prove it is safe to reorder "
272                 "memory operations";
273     });
274     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
275     Failed = true;
276   }
277 
278   return Failed;
279 }
280 
281 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
282 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
283 // executing the inner loop will execute the same iterations). This check is
284 // very constrained for now but it will be relaxed in the future. \p Lp is
285 // considered uniform if it meets all the following conditions:
286 //   1) it has a canonical IV (starting from 0 and with stride 1),
287 //   2) its latch terminator is a conditional branch and,
288 //   3) its latch condition is a compare instruction whose operands are the
289 //      canonical IV and an OuterLp invariant.
290 // This check doesn't take into account the uniformity of other conditions not
291 // related to the loop latch because they don't affect the loop uniformity.
292 //
293 // NOTE: We decided to keep all these checks and its associated documentation
294 // together so that we can easily have a picture of the current supported loop
295 // nests. However, some of the current checks don't depend on \p OuterLp and
296 // would be redundantly executed for each \p Lp if we invoked this function for
297 // different candidate outer loops. This is not the case for now because we
298 // don't currently have the infrastructure to evaluate multiple candidate outer
299 // loops and \p OuterLp will be a fixed parameter while we only support explicit
300 // outer loop vectorization. It's also very likely that these checks go away
301 // before introducing the aforementioned infrastructure. However, if this is not
302 // the case, we should move the \p OuterLp independent checks to a separate
303 // function that is only executed once for each \p Lp.
304 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
305   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
306 
307   // If Lp is the outer loop, it's uniform by definition.
308   if (Lp == OuterLp)
309     return true;
310   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
311 
312   // 1.
313   PHINode *IV = Lp->getCanonicalInductionVariable();
314   if (!IV) {
315     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
316     return false;
317   }
318 
319   // 2.
320   BasicBlock *Latch = Lp->getLoopLatch();
321   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
322   if (!LatchBr || LatchBr->isUnconditional()) {
323     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
324     return false;
325   }
326 
327   // 3.
328   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
329   if (!LatchCmp) {
330     LLVM_DEBUG(
331         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
332     return false;
333   }
334 
335   Value *CondOp0 = LatchCmp->getOperand(0);
336   Value *CondOp1 = LatchCmp->getOperand(1);
337   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
338   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
339       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
340     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
341     return false;
342   }
343 
344   return true;
345 }
346 
347 // Return true if \p Lp and all its nested loops are uniform with regard to \p
348 // OuterLp.
349 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
350   if (!isUniformLoop(Lp, OuterLp))
351     return false;
352 
353   // Check if nested loops are uniform.
354   for (Loop *SubLp : *Lp)
355     if (!isUniformLoopNest(SubLp, OuterLp))
356       return false;
357 
358   return true;
359 }
360 
361 /// Check whether it is safe to if-convert this phi node.
362 ///
363 /// Phi nodes with constant expressions that can trap are not safe to if
364 /// convert.
365 static bool canIfConvertPHINodes(BasicBlock *BB) {
366   for (PHINode &Phi : BB->phis()) {
367     for (Value *V : Phi.incoming_values())
368       if (auto *C = dyn_cast<Constant>(V))
369         if (C->canTrap())
370           return false;
371   }
372   return true;
373 }
374 
375 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
376   if (Ty->isPointerTy())
377     return DL.getIntPtrType(Ty);
378 
379   // It is possible that char's or short's overflow when we ask for the loop's
380   // trip count, work around this by changing the type size.
381   if (Ty->getScalarSizeInBits() < 32)
382     return Type::getInt32Ty(Ty->getContext());
383 
384   return Ty;
385 }
386 
387 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
388   Ty0 = convertPointerToIntegerType(DL, Ty0);
389   Ty1 = convertPointerToIntegerType(DL, Ty1);
390   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
391     return Ty0;
392   return Ty1;
393 }
394 
395 /// Check that the instruction has outside loop users and is not an
396 /// identified reduction variable.
397 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
398                                SmallPtrSetImpl<Value *> &AllowedExit) {
399   // Reductions, Inductions and non-header phis are allowed to have exit users. All
400   // other instructions must not have external users.
401   if (!AllowedExit.count(Inst))
402     // Check that all of the users of the loop are inside the BB.
403     for (User *U : Inst->users()) {
404       Instruction *UI = cast<Instruction>(U);
405       // This user may be a reduction exit value.
406       if (!TheLoop->contains(UI)) {
407         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
408         return true;
409       }
410     }
411   return false;
412 }
413 
414 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
415   const ValueToValueMap &Strides =
416       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
417 
418   Function *F = TheLoop->getHeader()->getParent();
419   bool OptForSize = F->hasOptSize() ||
420                     llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
421                                                 PGSOQueryType::IRPass);
422   bool CanAddPredicate = !OptForSize;
423   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
424   if (Stride == 1 || Stride == -1)
425     return Stride;
426   return 0;
427 }
428 
429 bool LoopVectorizationLegality::isUniform(Value *V) {
430   return LAI->isUniform(V);
431 }
432 
433 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
434   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
435   // Store the result and return it at the end instead of exiting early, in case
436   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
437   bool Result = true;
438   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
439 
440   for (BasicBlock *BB : TheLoop->blocks()) {
441     // Check whether the BB terminator is a BranchInst. Any other terminator is
442     // not supported yet.
443     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
444     if (!Br) {
445       reportVectorizationFailure("Unsupported basic block terminator",
446           "loop control flow is not understood by vectorizer",
447           "CFGNotUnderstood", ORE, TheLoop);
448       if (DoExtraAnalysis)
449         Result = false;
450       else
451         return false;
452     }
453 
454     // Check whether the BranchInst is a supported one. Only unconditional
455     // branches, conditional branches with an outer loop invariant condition or
456     // backedges are supported.
457     // FIXME: We skip these checks when VPlan predication is enabled as we
458     // want to allow divergent branches. This whole check will be removed
459     // once VPlan predication is on by default.
460     if (!EnableVPlanPredication && Br && Br->isConditional() &&
461         !TheLoop->isLoopInvariant(Br->getCondition()) &&
462         !LI->isLoopHeader(Br->getSuccessor(0)) &&
463         !LI->isLoopHeader(Br->getSuccessor(1))) {
464       reportVectorizationFailure("Unsupported conditional branch",
465           "loop control flow is not understood by vectorizer",
466           "CFGNotUnderstood", ORE, TheLoop);
467       if (DoExtraAnalysis)
468         Result = false;
469       else
470         return false;
471     }
472   }
473 
474   // Check whether inner loops are uniform. At this point, we only support
475   // simple outer loops scenarios with uniform nested loops.
476   if (!isUniformLoopNest(TheLoop /*loop nest*/,
477                          TheLoop /*context outer loop*/)) {
478     reportVectorizationFailure("Outer loop contains divergent loops",
479         "loop control flow is not understood by vectorizer",
480         "CFGNotUnderstood", ORE, TheLoop);
481     if (DoExtraAnalysis)
482       Result = false;
483     else
484       return false;
485   }
486 
487   // Check whether we are able to set up outer loop induction.
488   if (!setupOuterLoopInductions()) {
489     reportVectorizationFailure("Unsupported outer loop Phi(s)",
490                                "Unsupported outer loop Phi(s)",
491                                "UnsupportedPhi", ORE, TheLoop);
492     if (DoExtraAnalysis)
493       Result = false;
494     else
495       return false;
496   }
497 
498   return Result;
499 }
500 
501 void LoopVectorizationLegality::addInductionPhi(
502     PHINode *Phi, const InductionDescriptor &ID,
503     SmallPtrSetImpl<Value *> &AllowedExit) {
504   Inductions[Phi] = ID;
505 
506   // In case this induction also comes with casts that we know we can ignore
507   // in the vectorized loop body, record them here. All casts could be recorded
508   // here for ignoring, but suffices to record only the first (as it is the
509   // only one that may bw used outside the cast sequence).
510   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
511   if (!Casts.empty())
512     InductionCastsToIgnore.insert(*Casts.begin());
513 
514   Type *PhiTy = Phi->getType();
515   const DataLayout &DL = Phi->getModule()->getDataLayout();
516 
517   // Get the widest type.
518   if (!PhiTy->isFloatingPointTy()) {
519     if (!WidestIndTy)
520       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
521     else
522       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
523   }
524 
525   // Int inductions are special because we only allow one IV.
526   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
527       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
528       isa<Constant>(ID.getStartValue()) &&
529       cast<Constant>(ID.getStartValue())->isNullValue()) {
530 
531     // Use the phi node with the widest type as induction. Use the last
532     // one if there are multiple (no good reason for doing this other
533     // than it is expedient). We've checked that it begins at zero and
534     // steps by one, so this is a canonical induction variable.
535     if (!PrimaryInduction || PhiTy == WidestIndTy)
536       PrimaryInduction = Phi;
537   }
538 
539   // Both the PHI node itself, and the "post-increment" value feeding
540   // back into the PHI node may have external users.
541   // We can allow those uses, except if the SCEVs we have for them rely
542   // on predicates that only hold within the loop, since allowing the exit
543   // currently means re-using this SCEV outside the loop (see PR33706 for more
544   // details).
545   if (PSE.getUnionPredicate().isAlwaysTrue()) {
546     AllowedExit.insert(Phi);
547     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
548   }
549 
550   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
551 }
552 
553 bool LoopVectorizationLegality::setupOuterLoopInductions() {
554   BasicBlock *Header = TheLoop->getHeader();
555 
556   // Returns true if a given Phi is a supported induction.
557   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
558     InductionDescriptor ID;
559     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
560         ID.getKind() == InductionDescriptor::IK_IntInduction) {
561       addInductionPhi(&Phi, ID, AllowedExit);
562       return true;
563     } else {
564       // Bail out for any Phi in the outer loop header that is not a supported
565       // induction.
566       LLVM_DEBUG(
567           dbgs()
568           << "LV: Found unsupported PHI for outer loop vectorization.\n");
569       return false;
570     }
571   };
572 
573   if (llvm::all_of(Header->phis(), isSupportedPhi))
574     return true;
575   else
576     return false;
577 }
578 
579 /// Checks if a function is scalarizable according to the TLI, in
580 /// the sense that it should be vectorized and then expanded in
581 /// multiple scalarcalls. This is represented in the
582 /// TLI via mappings that do not specify a vector name, as in the
583 /// following example:
584 ///
585 ///    const VecDesc VecIntrinsics[] = {
586 ///      {"llvm.phx.abs.i32", "", 4}
587 ///    };
588 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
589   const StringRef ScalarName = CI.getCalledFunction()->getName();
590   bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
591   // Check that all known VFs are not associated to a vector
592   // function, i.e. the vector name is emty.
593   if (Scalarize)
594     for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName);
595          VF <= WidestVF; VF *= 2) {
596       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
597     }
598   return Scalarize;
599 }
600 
601 bool LoopVectorizationLegality::canVectorizeInstrs() {
602   BasicBlock *Header = TheLoop->getHeader();
603 
604   // Look for the attribute signaling the absence of NaNs.
605   Function &F = *Header->getParent();
606   HasFunNoNaNAttr =
607       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
608 
609   // For each block in the loop.
610   for (BasicBlock *BB : TheLoop->blocks()) {
611     // Scan the instructions in the block and look for hazards.
612     for (Instruction &I : *BB) {
613       if (auto *Phi = dyn_cast<PHINode>(&I)) {
614         Type *PhiTy = Phi->getType();
615         // Check that this PHI type is allowed.
616         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
617             !PhiTy->isPointerTy()) {
618           reportVectorizationFailure("Found a non-int non-pointer PHI",
619                                      "loop control flow is not understood by vectorizer",
620                                      "CFGNotUnderstood", ORE, TheLoop);
621           return false;
622         }
623 
624         // If this PHINode is not in the header block, then we know that we
625         // can convert it to select during if-conversion. No need to check if
626         // the PHIs in this block are induction or reduction variables.
627         if (BB != Header) {
628           // Non-header phi nodes that have outside uses can be vectorized. Add
629           // them to the list of allowed exits.
630           // Unsafe cyclic dependencies with header phis are identified during
631           // legalization for reduction, induction and first order
632           // recurrences.
633           AllowedExit.insert(&I);
634           continue;
635         }
636 
637         // We only allow if-converted PHIs with exactly two incoming values.
638         if (Phi->getNumIncomingValues() != 2) {
639           reportVectorizationFailure("Found an invalid PHI",
640               "loop control flow is not understood by vectorizer",
641               "CFGNotUnderstood", ORE, TheLoop, Phi);
642           return false;
643         }
644 
645         RecurrenceDescriptor RedDes;
646         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
647                                                  DT)) {
648           if (RedDes.hasUnsafeAlgebra())
649             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
650           AllowedExit.insert(RedDes.getLoopExitInstr());
651           Reductions[Phi] = RedDes;
652           continue;
653         }
654 
655         // TODO: Instead of recording the AllowedExit, it would be good to record the
656         // complementary set: NotAllowedExit. These include (but may not be
657         // limited to):
658         // 1. Reduction phis as they represent the one-before-last value, which
659         // is not available when vectorized
660         // 2. Induction phis and increment when SCEV predicates cannot be used
661         // outside the loop - see addInductionPhi
662         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
663         // outside the loop - see call to hasOutsideLoopUser in the non-phi
664         // handling below
665         // 4. FirstOrderRecurrence phis that can possibly be handled by
666         // extraction.
667         // By recording these, we can then reason about ways to vectorize each
668         // of these NotAllowedExit.
669         InductionDescriptor ID;
670         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
671           addInductionPhi(Phi, ID, AllowedExit);
672           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
673             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
674           continue;
675         }
676 
677         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
678                                                          SinkAfter, DT)) {
679           AllowedExit.insert(Phi);
680           FirstOrderRecurrences.insert(Phi);
681           continue;
682         }
683 
684         // As a last resort, coerce the PHI to a AddRec expression
685         // and re-try classifying it a an induction PHI.
686         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
687           addInductionPhi(Phi, ID, AllowedExit);
688           continue;
689         }
690 
691         reportVectorizationFailure("Found an unidentified PHI",
692             "value that could not be identified as "
693             "reduction is used outside the loop",
694             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
695         return false;
696       } // end of PHI handling
697 
698       // We handle calls that:
699       //   * Are debug info intrinsics.
700       //   * Have a mapping to an IR intrinsic.
701       //   * Have a vector version available.
702       auto *CI = dyn_cast<CallInst>(&I);
703 
704       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
705           !isa<DbgInfoIntrinsic>(CI) &&
706           !(CI->getCalledFunction() && TLI &&
707             (!VFDatabase::getMappings(*CI).empty() ||
708              isTLIScalarize(*TLI, *CI)))) {
709         // If the call is a recognized math libary call, it is likely that
710         // we can vectorize it given loosened floating-point constraints.
711         LibFunc Func;
712         bool IsMathLibCall =
713             TLI && CI->getCalledFunction() &&
714             CI->getType()->isFloatingPointTy() &&
715             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
716             TLI->hasOptimizedCodeGen(Func);
717 
718         if (IsMathLibCall) {
719           // TODO: Ideally, we should not use clang-specific language here,
720           // but it's hard to provide meaningful yet generic advice.
721           // Also, should this be guarded by allowExtraAnalysis() and/or be part
722           // of the returned info from isFunctionVectorizable()?
723           reportVectorizationFailure(
724               "Found a non-intrinsic callsite",
725               "library call cannot be vectorized. "
726               "Try compiling with -fno-math-errno, -ffast-math, "
727               "or similar flags",
728               "CantVectorizeLibcall", ORE, TheLoop, CI);
729         } else {
730           reportVectorizationFailure("Found a non-intrinsic callsite",
731                                      "call instruction cannot be vectorized",
732                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
733         }
734         return false;
735       }
736 
737       // Some intrinsics have scalar arguments and should be same in order for
738       // them to be vectorized (i.e. loop invariant).
739       if (CI) {
740         auto *SE = PSE.getSE();
741         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
742         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
743           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
744             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
745               reportVectorizationFailure("Found unvectorizable intrinsic",
746                   "intrinsic instruction cannot be vectorized",
747                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
748               return false;
749             }
750           }
751       }
752 
753       // Check that the instruction return type is vectorizable.
754       // Also, we can't vectorize extractelement instructions.
755       if ((!VectorType::isValidElementType(I.getType()) &&
756            !I.getType()->isVoidTy()) ||
757           isa<ExtractElementInst>(I)) {
758         reportVectorizationFailure("Found unvectorizable type",
759             "instruction return type cannot be vectorized",
760             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
761         return false;
762       }
763 
764       // Check that the stored type is vectorizable.
765       if (auto *ST = dyn_cast<StoreInst>(&I)) {
766         Type *T = ST->getValueOperand()->getType();
767         if (!VectorType::isValidElementType(T)) {
768           reportVectorizationFailure("Store instruction cannot be vectorized",
769                                      "store instruction cannot be vectorized",
770                                      "CantVectorizeStore", ORE, TheLoop, ST);
771           return false;
772         }
773 
774         // For nontemporal stores, check that a nontemporal vector version is
775         // supported on the target.
776         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
777           // Arbitrarily try a vector of 2 elements.
778           auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
779           assert(VecTy && "did not find vectorized version of stored type");
780           if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
781             reportVectorizationFailure(
782                 "nontemporal store instruction cannot be vectorized",
783                 "nontemporal store instruction cannot be vectorized",
784                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
785             return false;
786           }
787         }
788 
789       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
790         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
791           // For nontemporal loads, check that a nontemporal vector version is
792           // supported on the target (arbitrarily try a vector of 2 elements).
793           auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
794           assert(VecTy && "did not find vectorized version of load type");
795           if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
796             reportVectorizationFailure(
797                 "nontemporal load instruction cannot be vectorized",
798                 "nontemporal load instruction cannot be vectorized",
799                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
800             return false;
801           }
802         }
803 
804         // FP instructions can allow unsafe algebra, thus vectorizable by
805         // non-IEEE-754 compliant SIMD units.
806         // This applies to floating-point math operations and calls, not memory
807         // operations, shuffles, or casts, as they don't change precision or
808         // semantics.
809       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
810                  !I.isFast()) {
811         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
812         Hints->setPotentiallyUnsafe();
813       }
814 
815       // Reduction instructions are allowed to have exit users.
816       // All other instructions must not have external users.
817       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
818         // We can safely vectorize loops where instructions within the loop are
819         // used outside the loop only if the SCEV predicates within the loop is
820         // same as outside the loop. Allowing the exit means reusing the SCEV
821         // outside the loop.
822         if (PSE.getUnionPredicate().isAlwaysTrue()) {
823           AllowedExit.insert(&I);
824           continue;
825         }
826         reportVectorizationFailure("Value cannot be used outside the loop",
827                                    "value cannot be used outside the loop",
828                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
829         return false;
830       }
831     } // next instr.
832   }
833 
834   if (!PrimaryInduction) {
835     if (Inductions.empty()) {
836       reportVectorizationFailure("Did not find one integer induction var",
837           "loop induction variable could not be identified",
838           "NoInductionVariable", ORE, TheLoop);
839       return false;
840     } else if (!WidestIndTy) {
841       reportVectorizationFailure("Did not find one integer induction var",
842           "integer loop induction variable could not be identified",
843           "NoIntegerInductionVariable", ORE, TheLoop);
844       return false;
845     } else {
846       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
847     }
848   }
849 
850   // For first order recurrences, we use the previous value (incoming value from
851   // the latch) to check if it dominates all users of the recurrence. Bail out
852   // if we have to sink such an instruction for another recurrence, as the
853   // dominance requirement may not hold after sinking.
854   BasicBlock *LoopLatch = TheLoop->getLoopLatch();
855   if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
856         Instruction *V =
857             cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
858         return SinkAfter.find(V) != SinkAfter.end();
859       }))
860     return false;
861 
862   // Now we know the widest induction type, check if our found induction
863   // is the same size. If it's not, unset it here and InnerLoopVectorizer
864   // will create another.
865   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
866     PrimaryInduction = nullptr;
867 
868   return true;
869 }
870 
871 bool LoopVectorizationLegality::canVectorizeMemory() {
872   LAI = &(*GetLAA)(*TheLoop);
873   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
874   if (LAR) {
875     ORE->emit([&]() {
876       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
877                                         "loop not vectorized: ", *LAR);
878     });
879   }
880   if (!LAI->canVectorizeMemory())
881     return false;
882 
883   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
884     reportVectorizationFailure("Stores to a uniform address",
885         "write to a loop invariant address could not be vectorized",
886         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
887     return false;
888   }
889   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
890   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
891 
892   return true;
893 }
894 
895 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
896   Value *In0 = const_cast<Value *>(V);
897   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
898   if (!PN)
899     return false;
900 
901   return Inductions.count(PN);
902 }
903 
904 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
905   auto *Inst = dyn_cast<Instruction>(V);
906   return (Inst && InductionCastsToIgnore.count(Inst));
907 }
908 
909 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
910   return isInductionPhi(V) || isCastedInductionVariable(V);
911 }
912 
913 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
914   return FirstOrderRecurrences.count(Phi);
915 }
916 
917 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
918   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
919 }
920 
921 bool LoopVectorizationLegality::blockCanBePredicated(
922     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
923   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
924 
925   for (Instruction &I : *BB) {
926     // Check that we don't have a constant expression that can trap as operand.
927     for (Value *Operand : I.operands()) {
928       if (auto *C = dyn_cast<Constant>(Operand))
929         if (C->canTrap())
930           return false;
931     }
932 
933     // We can predicate blocks with calls to assume, as long as we drop them in
934     // case we flatten the CFG via predication.
935     if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
936       ConditionalAssumes.insert(&I);
937       continue;
938     }
939 
940     // We might be able to hoist the load.
941     if (I.mayReadFromMemory()) {
942       auto *LI = dyn_cast<LoadInst>(&I);
943       if (!LI)
944         return false;
945       if (!SafePtrs.count(LI->getPointerOperand())) {
946         // !llvm.mem.parallel_loop_access implies if-conversion safety.
947         // Otherwise, record that the load needs (real or emulated) masking
948         // and let the cost model decide.
949         if (!IsAnnotatedParallel || PreserveGuards)
950           MaskedOp.insert(LI);
951         continue;
952       }
953     }
954 
955     if (I.mayWriteToMemory()) {
956       auto *SI = dyn_cast<StoreInst>(&I);
957       if (!SI)
958         return false;
959       // Predicated store requires some form of masking:
960       // 1) masked store HW instruction,
961       // 2) emulation via load-blend-store (only if safe and legal to do so,
962       //    be aware on the race conditions), or
963       // 3) element-by-element predicate check and scalar store.
964       MaskedOp.insert(SI);
965       continue;
966     }
967     if (I.mayThrow())
968       return false;
969   }
970 
971   return true;
972 }
973 
974 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
975   if (!EnableIfConversion) {
976     reportVectorizationFailure("If-conversion is disabled",
977                                "if-conversion is disabled",
978                                "IfConversionDisabled",
979                                ORE, TheLoop);
980     return false;
981   }
982 
983   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
984 
985   // A list of pointers which are known to be dereferenceable within scope of
986   // the loop body for each iteration of the loop which executes.  That is,
987   // the memory pointed to can be dereferenced (with the access size implied by
988   // the value's type) unconditionally within the loop header without
989   // introducing a new fault.
990   SmallPtrSet<Value *, 8> SafePointers;
991 
992   // Collect safe addresses.
993   for (BasicBlock *BB : TheLoop->blocks()) {
994     if (!blockNeedsPredication(BB)) {
995       for (Instruction &I : *BB)
996         if (auto *Ptr = getLoadStorePointerOperand(&I))
997           SafePointers.insert(Ptr);
998       continue;
999     }
1000 
1001     // For a block which requires predication, a address may be safe to access
1002     // in the loop w/o predication if we can prove dereferenceability facts
1003     // sufficient to ensure it'll never fault within the loop. For the moment,
1004     // we restrict this to loads; stores are more complicated due to
1005     // concurrency restrictions.
1006     ScalarEvolution &SE = *PSE.getSE();
1007     for (Instruction &I : *BB) {
1008       LoadInst *LI = dyn_cast<LoadInst>(&I);
1009       if (LI && !mustSuppressSpeculation(*LI) &&
1010           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
1011         SafePointers.insert(LI->getPointerOperand());
1012     }
1013   }
1014 
1015   // Collect the blocks that need predication.
1016   BasicBlock *Header = TheLoop->getHeader();
1017   for (BasicBlock *BB : TheLoop->blocks()) {
1018     // We don't support switch statements inside loops.
1019     if (!isa<BranchInst>(BB->getTerminator())) {
1020       reportVectorizationFailure("Loop contains a switch statement",
1021                                  "loop contains a switch statement",
1022                                  "LoopContainsSwitch", ORE, TheLoop,
1023                                  BB->getTerminator());
1024       return false;
1025     }
1026 
1027     // We must be able to predicate all blocks that need to be predicated.
1028     if (blockNeedsPredication(BB)) {
1029       if (!blockCanBePredicated(BB, SafePointers)) {
1030         reportVectorizationFailure(
1031             "Control flow cannot be substituted for a select",
1032             "control flow cannot be substituted for a select",
1033             "NoCFGForSelect", ORE, TheLoop,
1034             BB->getTerminator());
1035         return false;
1036       }
1037     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
1038       reportVectorizationFailure(
1039           "Control flow cannot be substituted for a select",
1040           "control flow cannot be substituted for a select",
1041           "NoCFGForSelect", ORE, TheLoop,
1042           BB->getTerminator());
1043       return false;
1044     }
1045   }
1046 
1047   // We can if-convert this loop.
1048   return true;
1049 }
1050 
1051 // Helper function to canVectorizeLoopNestCFG.
1052 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1053                                                     bool UseVPlanNativePath) {
1054   assert((UseVPlanNativePath || Lp->empty()) &&
1055          "VPlan-native path is not enabled.");
1056 
1057   // TODO: ORE should be improved to show more accurate information when an
1058   // outer loop can't be vectorized because a nested loop is not understood or
1059   // legal. Something like: "outer_loop_location: loop not vectorized:
1060   // (inner_loop_location) loop control flow is not understood by vectorizer".
1061 
1062   // Store the result and return it at the end instead of exiting early, in case
1063   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1064   bool Result = true;
1065   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1066 
1067   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1068   // be canonicalized.
1069   if (!Lp->getLoopPreheader()) {
1070     reportVectorizationFailure("Loop doesn't have a legal pre-header",
1071         "loop control flow is not understood by vectorizer",
1072         "CFGNotUnderstood", ORE, TheLoop);
1073     if (DoExtraAnalysis)
1074       Result = false;
1075     else
1076       return false;
1077   }
1078 
1079   // We must have a single backedge.
1080   if (Lp->getNumBackEdges() != 1) {
1081     reportVectorizationFailure("The loop must have a single backedge",
1082         "loop control flow is not understood by vectorizer",
1083         "CFGNotUnderstood", ORE, TheLoop);
1084     if (DoExtraAnalysis)
1085       Result = false;
1086     else
1087       return false;
1088   }
1089 
1090   // We must have a single exiting block.
1091   if (!Lp->getExitingBlock()) {
1092     reportVectorizationFailure("The loop must have an exiting block",
1093         "loop control flow is not understood by vectorizer",
1094         "CFGNotUnderstood", ORE, TheLoop);
1095     if (DoExtraAnalysis)
1096       Result = false;
1097     else
1098       return false;
1099   }
1100 
1101   // We only handle bottom-tested loops, i.e. loop in which the condition is
1102   // checked at the end of each iteration. With that we can assume that all
1103   // instructions in the loop are executed the same number of times.
1104   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1105     reportVectorizationFailure("The exiting block is not the loop latch",
1106         "loop control flow is not understood by vectorizer",
1107         "CFGNotUnderstood", ORE, TheLoop);
1108     if (DoExtraAnalysis)
1109       Result = false;
1110     else
1111       return false;
1112   }
1113 
1114   return Result;
1115 }
1116 
1117 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1118     Loop *Lp, bool UseVPlanNativePath) {
1119   // Store the result and return it at the end instead of exiting early, in case
1120   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1121   bool Result = true;
1122   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1123   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1124     if (DoExtraAnalysis)
1125       Result = false;
1126     else
1127       return false;
1128   }
1129 
1130   // Recursively check whether the loop control flow of nested loops is
1131   // understood.
1132   for (Loop *SubLp : *Lp)
1133     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1134       if (DoExtraAnalysis)
1135         Result = false;
1136       else
1137         return false;
1138     }
1139 
1140   return Result;
1141 }
1142 
1143 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1144   // Store the result and return it at the end instead of exiting early, in case
1145   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1146   bool Result = true;
1147 
1148   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1149   // Check whether the loop-related control flow in the loop nest is expected by
1150   // vectorizer.
1151   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1152     if (DoExtraAnalysis)
1153       Result = false;
1154     else
1155       return false;
1156   }
1157 
1158   // We need to have a loop header.
1159   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1160                     << '\n');
1161 
1162   // Specific checks for outer loops. We skip the remaining legal checks at this
1163   // point because they don't support outer loops.
1164   if (!TheLoop->empty()) {
1165     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1166 
1167     if (!canVectorizeOuterLoop()) {
1168       reportVectorizationFailure("Unsupported outer loop",
1169                                  "unsupported outer loop",
1170                                  "UnsupportedOuterLoop",
1171                                  ORE, TheLoop);
1172       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1173       // outer loops.
1174       return false;
1175     }
1176 
1177     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1178     return Result;
1179   }
1180 
1181   assert(TheLoop->empty() && "Inner loop expected.");
1182   // Check if we can if-convert non-single-bb loops.
1183   unsigned NumBlocks = TheLoop->getNumBlocks();
1184   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1185     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1186     if (DoExtraAnalysis)
1187       Result = false;
1188     else
1189       return false;
1190   }
1191 
1192   // Check if we can vectorize the instructions and CFG in this loop.
1193   if (!canVectorizeInstrs()) {
1194     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1195     if (DoExtraAnalysis)
1196       Result = false;
1197     else
1198       return false;
1199   }
1200 
1201   // Go over each instruction and look at memory deps.
1202   if (!canVectorizeMemory()) {
1203     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1204     if (DoExtraAnalysis)
1205       Result = false;
1206     else
1207       return false;
1208   }
1209 
1210   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1211                     << (LAI->getRuntimePointerChecking()->Need
1212                             ? " (with a runtime bound check)"
1213                             : "")
1214                     << "!\n");
1215 
1216   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1217   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1218     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1219 
1220   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1221     reportVectorizationFailure("Too many SCEV checks needed",
1222         "Too many SCEV assumptions need to be made and checked at runtime",
1223         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1224     if (DoExtraAnalysis)
1225       Result = false;
1226     else
1227       return false;
1228   }
1229 
1230   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1231   // we can vectorize, and at this point we don't have any other mem analysis
1232   // which may limit our maximum vectorization factor, so just return true with
1233   // no restrictions.
1234   return Result;
1235 }
1236 
1237 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1238 
1239   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1240 
1241   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1242 
1243   for (auto &Reduction : getReductionVars())
1244     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1245 
1246   // TODO: handle non-reduction outside users when tail is folded by masking.
1247   for (auto *AE : AllowedExit) {
1248     // Check that all users of allowed exit values are inside the loop or
1249     // are the live-out of a reduction.
1250     if (ReductionLiveOuts.count(AE))
1251       continue;
1252     for (User *U : AE->users()) {
1253       Instruction *UI = cast<Instruction>(U);
1254       if (TheLoop->contains(UI))
1255         continue;
1256       reportVectorizationFailure(
1257           "Cannot fold tail by masking, loop has an outside user for",
1258           "Cannot fold tail by masking in the presence of live outs.",
1259           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1260       return false;
1261     }
1262   }
1263 
1264   // The list of pointers that we can safely read and write to remains empty.
1265   SmallPtrSet<Value *, 8> SafePointers;
1266 
1267   // Check and mark all blocks for predication, including those that ordinarily
1268   // do not need predication such as the header block.
1269   for (BasicBlock *BB : TheLoop->blocks()) {
1270     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1271       reportVectorizationFailure(
1272           "Cannot fold tail by masking as required",
1273           "control flow cannot be substituted for a select",
1274           "NoCFGForSelect", ORE, TheLoop,
1275           BB->getTerminator());
1276       return false;
1277     }
1278   }
1279 
1280   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1281   return true;
1282 }
1283 
1284 } // namespace llvm
1285