1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file provides loop vectorization legality analysis. Original code
10 // resided in LoopVectorize.cpp for a long time.
11 //
12 // At this point, it is implemented as a utility class, not as an analysis
13 // pass. It should be easy to create an analysis pass around it if there
14 // is a need (but D45420 needs to happen first).
15 //
16 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
17 #include "llvm/Analysis/Loads.h"
18 #include "llvm/Analysis/LoopInfo.h"
19 #include "llvm/Analysis/ValueTracking.h"
20 #include "llvm/Analysis/VectorUtils.h"
21 #include "llvm/IR/IntrinsicInst.h"
22 #include "llvm/IR/PatternMatch.h"
23 #include "llvm/Transforms/Utils/SizeOpts.h"
24 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
25 
26 using namespace llvm;
27 using namespace PatternMatch;
28 
29 #define LV_NAME "loop-vectorize"
30 #define DEBUG_TYPE LV_NAME
31 
32 extern cl::opt<bool> EnableVPlanPredication;
33 
34 static cl::opt<bool>
35     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
36                        cl::desc("Enable if-conversion during vectorization."));
37 
38 static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
39     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
40     cl::desc("The maximum allowed number of runtime memory checks with a "
41              "vectorize(enable) pragma."));
42 
43 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
44     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
45     cl::desc("The maximum number of SCEV checks allowed."));
46 
47 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
48     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
49     cl::desc("The maximum number of SCEV checks allowed with a "
50              "vectorize(enable) pragma"));
51 
52 /// Maximum vectorization interleave count.
53 static const unsigned MaxInterleaveFactor = 16;
54 
55 namespace llvm {
56 
57 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
58   switch (Kind) {
59   case HK_WIDTH:
60     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
61   case HK_UNROLL:
62     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
63   case HK_FORCE:
64     return (Val <= 1);
65   case HK_ISVECTORIZED:
66   case HK_PREDICATE:
67     return (Val == 0 || Val == 1);
68   }
69   return false;
70 }
71 
72 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
73                                        bool InterleaveOnlyWhenForced,
74                                        OptimizationRemarkEmitter &ORE)
75     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
76       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
77       Force("vectorize.enable", FK_Undefined, HK_FORCE),
78       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
79       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
80       ORE(ORE) {
81   // Populate values with existing loop metadata.
82   getHintsFromMetadata();
83 
84   // force-vector-interleave overrides DisableInterleaving.
85   if (VectorizerParams::isInterleaveForced())
86     Interleave.Value = VectorizerParams::VectorizationInterleave;
87 
88   if (IsVectorized.Value != 1)
89     // If the vectorization width and interleaving count are both 1 then
90     // consider the loop to have been already vectorized because there's
91     // nothing more that we can do.
92     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
93   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
94              << "LV: Interleaving disabled by the pass manager\n");
95 }
96 
97 void LoopVectorizeHints::setAlreadyVectorized() {
98   LLVMContext &Context = TheLoop->getHeader()->getContext();
99 
100   MDNode *IsVectorizedMD = MDNode::get(
101       Context,
102       {MDString::get(Context, "llvm.loop.isvectorized"),
103        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
104   MDNode *LoopID = TheLoop->getLoopID();
105   MDNode *NewLoopID =
106       makePostTransformationMetadata(Context, LoopID,
107                                      {Twine(Prefix(), "vectorize.").str(),
108                                       Twine(Prefix(), "interleave.").str()},
109                                      {IsVectorizedMD});
110   TheLoop->setLoopID(NewLoopID);
111 
112   // Update internal cache.
113   IsVectorized.Value = 1;
114 }
115 
116 bool LoopVectorizeHints::allowVectorization(
117     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
118   if (getForce() == LoopVectorizeHints::FK_Disabled) {
119     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
120     emitRemarkWithHints();
121     return false;
122   }
123 
124   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
125     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
126     emitRemarkWithHints();
127     return false;
128   }
129 
130   if (getIsVectorized() == 1) {
131     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
132     // FIXME: Add interleave.disable metadata. This will allow
133     // vectorize.disable to be used without disabling the pass and errors
134     // to differentiate between disabled vectorization and a width of 1.
135     ORE.emit([&]() {
136       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
137                                         "AllDisabled", L->getStartLoc(),
138                                         L->getHeader())
139              << "loop not vectorized: vectorization and interleaving are "
140                 "explicitly disabled, or the loop has already been "
141                 "vectorized";
142     });
143     return false;
144   }
145 
146   return true;
147 }
148 
149 void LoopVectorizeHints::emitRemarkWithHints() const {
150   using namespace ore;
151 
152   ORE.emit([&]() {
153     if (Force.Value == LoopVectorizeHints::FK_Disabled)
154       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
155                                       TheLoop->getStartLoc(),
156                                       TheLoop->getHeader())
157              << "loop not vectorized: vectorization is explicitly disabled";
158     else {
159       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
160                                  TheLoop->getStartLoc(), TheLoop->getHeader());
161       R << "loop not vectorized";
162       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
163         R << " (Force=" << NV("Force", true);
164         if (Width.Value != 0)
165           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
166         if (Interleave.Value != 0)
167           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
168         R << ")";
169       }
170       return R;
171     }
172   });
173 }
174 
175 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
176   if (getWidth() == 1)
177     return LV_NAME;
178   if (getForce() == LoopVectorizeHints::FK_Disabled)
179     return LV_NAME;
180   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
181     return LV_NAME;
182   return OptimizationRemarkAnalysis::AlwaysPrint;
183 }
184 
185 void LoopVectorizeHints::getHintsFromMetadata() {
186   MDNode *LoopID = TheLoop->getLoopID();
187   if (!LoopID)
188     return;
189 
190   // First operand should refer to the loop id itself.
191   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
192   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
193 
194   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
195     const MDString *S = nullptr;
196     SmallVector<Metadata *, 4> Args;
197 
198     // The expected hint is either a MDString or a MDNode with the first
199     // operand a MDString.
200     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
201       if (!MD || MD->getNumOperands() == 0)
202         continue;
203       S = dyn_cast<MDString>(MD->getOperand(0));
204       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
205         Args.push_back(MD->getOperand(i));
206     } else {
207       S = dyn_cast<MDString>(LoopID->getOperand(i));
208       assert(Args.size() == 0 && "too many arguments for MDString");
209     }
210 
211     if (!S)
212       continue;
213 
214     // Check if the hint starts with the loop metadata prefix.
215     StringRef Name = S->getString();
216     if (Args.size() == 1)
217       setHint(Name, Args[0]);
218   }
219 }
220 
221 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
222   if (!Name.startswith(Prefix()))
223     return;
224   Name = Name.substr(Prefix().size(), StringRef::npos);
225 
226   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
227   if (!C)
228     return;
229   unsigned Val = C->getZExtValue();
230 
231   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
232   for (auto H : Hints) {
233     if (Name == H->Name) {
234       if (H->validate(Val))
235         H->Value = Val;
236       else
237         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
238       break;
239     }
240   }
241 }
242 
243 bool LoopVectorizationRequirements::doesNotMeet(
244     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
245   const char *PassName = Hints.vectorizeAnalysisPassName();
246   bool Failed = false;
247   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
248     ORE.emit([&]() {
249       return OptimizationRemarkAnalysisFPCommute(
250                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
251                  UnsafeAlgebraInst->getParent())
252              << "loop not vectorized: cannot prove it is safe to reorder "
253                 "floating-point operations";
254     });
255     Failed = true;
256   }
257 
258   // Test if runtime memcheck thresholds are exceeded.
259   bool PragmaThresholdReached =
260       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
261   bool ThresholdReached =
262       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
263   if ((ThresholdReached && !Hints.allowReordering()) ||
264       PragmaThresholdReached) {
265     ORE.emit([&]() {
266       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
267                                                 L->getStartLoc(),
268                                                 L->getHeader())
269              << "loop not vectorized: cannot prove it is safe to reorder "
270                 "memory operations";
271     });
272     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
273     Failed = true;
274   }
275 
276   return Failed;
277 }
278 
279 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
280 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
281 // executing the inner loop will execute the same iterations). This check is
282 // very constrained for now but it will be relaxed in the future. \p Lp is
283 // considered uniform if it meets all the following conditions:
284 //   1) it has a canonical IV (starting from 0 and with stride 1),
285 //   2) its latch terminator is a conditional branch and,
286 //   3) its latch condition is a compare instruction whose operands are the
287 //      canonical IV and an OuterLp invariant.
288 // This check doesn't take into account the uniformity of other conditions not
289 // related to the loop latch because they don't affect the loop uniformity.
290 //
291 // NOTE: We decided to keep all these checks and its associated documentation
292 // together so that we can easily have a picture of the current supported loop
293 // nests. However, some of the current checks don't depend on \p OuterLp and
294 // would be redundantly executed for each \p Lp if we invoked this function for
295 // different candidate outer loops. This is not the case for now because we
296 // don't currently have the infrastructure to evaluate multiple candidate outer
297 // loops and \p OuterLp will be a fixed parameter while we only support explicit
298 // outer loop vectorization. It's also very likely that these checks go away
299 // before introducing the aforementioned infrastructure. However, if this is not
300 // the case, we should move the \p OuterLp independent checks to a separate
301 // function that is only executed once for each \p Lp.
302 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
303   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
304 
305   // If Lp is the outer loop, it's uniform by definition.
306   if (Lp == OuterLp)
307     return true;
308   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
309 
310   // 1.
311   PHINode *IV = Lp->getCanonicalInductionVariable();
312   if (!IV) {
313     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
314     return false;
315   }
316 
317   // 2.
318   BasicBlock *Latch = Lp->getLoopLatch();
319   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
320   if (!LatchBr || LatchBr->isUnconditional()) {
321     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
322     return false;
323   }
324 
325   // 3.
326   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
327   if (!LatchCmp) {
328     LLVM_DEBUG(
329         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
330     return false;
331   }
332 
333   Value *CondOp0 = LatchCmp->getOperand(0);
334   Value *CondOp1 = LatchCmp->getOperand(1);
335   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
336   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
337       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
338     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
339     return false;
340   }
341 
342   return true;
343 }
344 
345 // Return true if \p Lp and all its nested loops are uniform with regard to \p
346 // OuterLp.
347 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
348   if (!isUniformLoop(Lp, OuterLp))
349     return false;
350 
351   // Check if nested loops are uniform.
352   for (Loop *SubLp : *Lp)
353     if (!isUniformLoopNest(SubLp, OuterLp))
354       return false;
355 
356   return true;
357 }
358 
359 /// Check whether it is safe to if-convert this phi node.
360 ///
361 /// Phi nodes with constant expressions that can trap are not safe to if
362 /// convert.
363 static bool canIfConvertPHINodes(BasicBlock *BB) {
364   for (PHINode &Phi : BB->phis()) {
365     for (Value *V : Phi.incoming_values())
366       if (auto *C = dyn_cast<Constant>(V))
367         if (C->canTrap())
368           return false;
369   }
370   return true;
371 }
372 
373 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
374   if (Ty->isPointerTy())
375     return DL.getIntPtrType(Ty);
376 
377   // It is possible that char's or short's overflow when we ask for the loop's
378   // trip count, work around this by changing the type size.
379   if (Ty->getScalarSizeInBits() < 32)
380     return Type::getInt32Ty(Ty->getContext());
381 
382   return Ty;
383 }
384 
385 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
386   Ty0 = convertPointerToIntegerType(DL, Ty0);
387   Ty1 = convertPointerToIntegerType(DL, Ty1);
388   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
389     return Ty0;
390   return Ty1;
391 }
392 
393 /// Check that the instruction has outside loop users and is not an
394 /// identified reduction variable.
395 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
396                                SmallPtrSetImpl<Value *> &AllowedExit) {
397   // Reductions, Inductions and non-header phis are allowed to have exit users. All
398   // other instructions must not have external users.
399   if (!AllowedExit.count(Inst))
400     // Check that all of the users of the loop are inside the BB.
401     for (User *U : Inst->users()) {
402       Instruction *UI = cast<Instruction>(U);
403       // This user may be a reduction exit value.
404       if (!TheLoop->contains(UI)) {
405         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
406         return true;
407       }
408     }
409   return false;
410 }
411 
412 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
413   const ValueToValueMap &Strides =
414       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
415 
416   Function *F = TheLoop->getHeader()->getParent();
417   bool OptForSize = F->hasOptSize() ||
418                     llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
419                                                 PGSOQueryType::IRPass);
420   bool CanAddPredicate = !OptForSize;
421   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
422   if (Stride == 1 || Stride == -1)
423     return Stride;
424   return 0;
425 }
426 
427 bool LoopVectorizationLegality::isUniform(Value *V) {
428   return LAI->isUniform(V);
429 }
430 
431 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
432   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
433   // Store the result and return it at the end instead of exiting early, in case
434   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
435   bool Result = true;
436   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
437 
438   for (BasicBlock *BB : TheLoop->blocks()) {
439     // Check whether the BB terminator is a BranchInst. Any other terminator is
440     // not supported yet.
441     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
442     if (!Br) {
443       reportVectorizationFailure("Unsupported basic block terminator",
444           "loop control flow is not understood by vectorizer",
445           "CFGNotUnderstood", ORE, TheLoop);
446       if (DoExtraAnalysis)
447         Result = false;
448       else
449         return false;
450     }
451 
452     // Check whether the BranchInst is a supported one. Only unconditional
453     // branches, conditional branches with an outer loop invariant condition or
454     // backedges are supported.
455     // FIXME: We skip these checks when VPlan predication is enabled as we
456     // want to allow divergent branches. This whole check will be removed
457     // once VPlan predication is on by default.
458     if (!EnableVPlanPredication && Br && Br->isConditional() &&
459         !TheLoop->isLoopInvariant(Br->getCondition()) &&
460         !LI->isLoopHeader(Br->getSuccessor(0)) &&
461         !LI->isLoopHeader(Br->getSuccessor(1))) {
462       reportVectorizationFailure("Unsupported conditional branch",
463           "loop control flow is not understood by vectorizer",
464           "CFGNotUnderstood", ORE, TheLoop);
465       if (DoExtraAnalysis)
466         Result = false;
467       else
468         return false;
469     }
470   }
471 
472   // Check whether inner loops are uniform. At this point, we only support
473   // simple outer loops scenarios with uniform nested loops.
474   if (!isUniformLoopNest(TheLoop /*loop nest*/,
475                          TheLoop /*context outer loop*/)) {
476     reportVectorizationFailure("Outer loop contains divergent loops",
477         "loop control flow is not understood by vectorizer",
478         "CFGNotUnderstood", ORE, TheLoop);
479     if (DoExtraAnalysis)
480       Result = false;
481     else
482       return false;
483   }
484 
485   // Check whether we are able to set up outer loop induction.
486   if (!setupOuterLoopInductions()) {
487     reportVectorizationFailure("Unsupported outer loop Phi(s)",
488                                "Unsupported outer loop Phi(s)",
489                                "UnsupportedPhi", ORE, TheLoop);
490     if (DoExtraAnalysis)
491       Result = false;
492     else
493       return false;
494   }
495 
496   return Result;
497 }
498 
499 void LoopVectorizationLegality::addInductionPhi(
500     PHINode *Phi, const InductionDescriptor &ID,
501     SmallPtrSetImpl<Value *> &AllowedExit) {
502   Inductions[Phi] = ID;
503 
504   // In case this induction also comes with casts that we know we can ignore
505   // in the vectorized loop body, record them here. All casts could be recorded
506   // here for ignoring, but suffices to record only the first (as it is the
507   // only one that may bw used outside the cast sequence).
508   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
509   if (!Casts.empty())
510     InductionCastsToIgnore.insert(*Casts.begin());
511 
512   Type *PhiTy = Phi->getType();
513   const DataLayout &DL = Phi->getModule()->getDataLayout();
514 
515   // Get the widest type.
516   if (!PhiTy->isFloatingPointTy()) {
517     if (!WidestIndTy)
518       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
519     else
520       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
521   }
522 
523   // Int inductions are special because we only allow one IV.
524   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
525       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
526       isa<Constant>(ID.getStartValue()) &&
527       cast<Constant>(ID.getStartValue())->isNullValue()) {
528 
529     // Use the phi node with the widest type as induction. Use the last
530     // one if there are multiple (no good reason for doing this other
531     // than it is expedient). We've checked that it begins at zero and
532     // steps by one, so this is a canonical induction variable.
533     if (!PrimaryInduction || PhiTy == WidestIndTy)
534       PrimaryInduction = Phi;
535   }
536 
537   // Both the PHI node itself, and the "post-increment" value feeding
538   // back into the PHI node may have external users.
539   // We can allow those uses, except if the SCEVs we have for them rely
540   // on predicates that only hold within the loop, since allowing the exit
541   // currently means re-using this SCEV outside the loop (see PR33706 for more
542   // details).
543   if (PSE.getUnionPredicate().isAlwaysTrue()) {
544     AllowedExit.insert(Phi);
545     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
546   }
547 
548   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
549 }
550 
551 bool LoopVectorizationLegality::setupOuterLoopInductions() {
552   BasicBlock *Header = TheLoop->getHeader();
553 
554   // Returns true if a given Phi is a supported induction.
555   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
556     InductionDescriptor ID;
557     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
558         ID.getKind() == InductionDescriptor::IK_IntInduction) {
559       addInductionPhi(&Phi, ID, AllowedExit);
560       return true;
561     } else {
562       // Bail out for any Phi in the outer loop header that is not a supported
563       // induction.
564       LLVM_DEBUG(
565           dbgs()
566           << "LV: Found unsupported PHI for outer loop vectorization.\n");
567       return false;
568     }
569   };
570 
571   if (llvm::all_of(Header->phis(), isSupportedPhi))
572     return true;
573   else
574     return false;
575 }
576 
577 /// Checks if a function is scalarizable according to the TLI, in
578 /// the sense that it should be vectorized and then expanded in
579 /// multiple scalarcalls. This is represented in the
580 /// TLI via mappings that do not specify a vector name, as in the
581 /// following example:
582 ///
583 ///    const VecDesc VecIntrinsics[] = {
584 ///      {"llvm.phx.abs.i32", "", 4}
585 ///    };
586 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
587   const StringRef ScalarName = CI.getCalledFunction()->getName();
588   bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
589   // Check that all known VFs are not associated to a vector
590   // function, i.e. the vector name is emty.
591   if (Scalarize)
592     for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName);
593          VF <= WidestVF; VF *= 2) {
594       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
595     }
596   return Scalarize;
597 }
598 
599 bool LoopVectorizationLegality::canVectorizeInstrs() {
600   BasicBlock *Header = TheLoop->getHeader();
601 
602   // Look for the attribute signaling the absence of NaNs.
603   Function &F = *Header->getParent();
604   HasFunNoNaNAttr =
605       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
606 
607   // For each block in the loop.
608   for (BasicBlock *BB : TheLoop->blocks()) {
609     // Scan the instructions in the block and look for hazards.
610     for (Instruction &I : *BB) {
611       if (auto *Phi = dyn_cast<PHINode>(&I)) {
612         Type *PhiTy = Phi->getType();
613         // Check that this PHI type is allowed.
614         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
615             !PhiTy->isPointerTy()) {
616           reportVectorizationFailure("Found a non-int non-pointer PHI",
617                                      "loop control flow is not understood by vectorizer",
618                                      "CFGNotUnderstood", ORE, TheLoop);
619           return false;
620         }
621 
622         // If this PHINode is not in the header block, then we know that we
623         // can convert it to select during if-conversion. No need to check if
624         // the PHIs in this block are induction or reduction variables.
625         if (BB != Header) {
626           // Non-header phi nodes that have outside uses can be vectorized. Add
627           // them to the list of allowed exits.
628           // Unsafe cyclic dependencies with header phis are identified during
629           // legalization for reduction, induction and first order
630           // recurrences.
631           AllowedExit.insert(&I);
632           continue;
633         }
634 
635         // We only allow if-converted PHIs with exactly two incoming values.
636         if (Phi->getNumIncomingValues() != 2) {
637           reportVectorizationFailure("Found an invalid PHI",
638               "loop control flow is not understood by vectorizer",
639               "CFGNotUnderstood", ORE, TheLoop, Phi);
640           return false;
641         }
642 
643         RecurrenceDescriptor RedDes;
644         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
645                                                  DT)) {
646           if (RedDes.hasUnsafeAlgebra())
647             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
648           AllowedExit.insert(RedDes.getLoopExitInstr());
649           Reductions[Phi] = RedDes;
650           continue;
651         }
652 
653         // TODO: Instead of recording the AllowedExit, it would be good to record the
654         // complementary set: NotAllowedExit. These include (but may not be
655         // limited to):
656         // 1. Reduction phis as they represent the one-before-last value, which
657         // is not available when vectorized
658         // 2. Induction phis and increment when SCEV predicates cannot be used
659         // outside the loop - see addInductionPhi
660         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
661         // outside the loop - see call to hasOutsideLoopUser in the non-phi
662         // handling below
663         // 4. FirstOrderRecurrence phis that can possibly be handled by
664         // extraction.
665         // By recording these, we can then reason about ways to vectorize each
666         // of these NotAllowedExit.
667         InductionDescriptor ID;
668         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
669           addInductionPhi(Phi, ID, AllowedExit);
670           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
671             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
672           continue;
673         }
674 
675         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
676                                                          SinkAfter, DT)) {
677           AllowedExit.insert(Phi);
678           FirstOrderRecurrences.insert(Phi);
679           continue;
680         }
681 
682         // As a last resort, coerce the PHI to a AddRec expression
683         // and re-try classifying it a an induction PHI.
684         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
685           addInductionPhi(Phi, ID, AllowedExit);
686           continue;
687         }
688 
689         reportVectorizationFailure("Found an unidentified PHI",
690             "value that could not be identified as "
691             "reduction is used outside the loop",
692             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
693         return false;
694       } // end of PHI handling
695 
696       // We handle calls that:
697       //   * Are debug info intrinsics.
698       //   * Have a mapping to an IR intrinsic.
699       //   * Have a vector version available.
700       auto *CI = dyn_cast<CallInst>(&I);
701 
702       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
703           !isa<DbgInfoIntrinsic>(CI) &&
704           !(CI->getCalledFunction() && TLI &&
705             (!VFDatabase::getMappings(*CI).empty() ||
706              isTLIScalarize(*TLI, *CI)))) {
707         // If the call is a recognized math libary call, it is likely that
708         // we can vectorize it given loosened floating-point constraints.
709         LibFunc Func;
710         bool IsMathLibCall =
711             TLI && CI->getCalledFunction() &&
712             CI->getType()->isFloatingPointTy() &&
713             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
714             TLI->hasOptimizedCodeGen(Func);
715 
716         if (IsMathLibCall) {
717           // TODO: Ideally, we should not use clang-specific language here,
718           // but it's hard to provide meaningful yet generic advice.
719           // Also, should this be guarded by allowExtraAnalysis() and/or be part
720           // of the returned info from isFunctionVectorizable()?
721           reportVectorizationFailure(
722               "Found a non-intrinsic callsite",
723               "library call cannot be vectorized. "
724               "Try compiling with -fno-math-errno, -ffast-math, "
725               "or similar flags",
726               "CantVectorizeLibcall", ORE, TheLoop, CI);
727         } else {
728           reportVectorizationFailure("Found a non-intrinsic callsite",
729                                      "call instruction cannot be vectorized",
730                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
731         }
732         return false;
733       }
734 
735       // Some intrinsics have scalar arguments and should be same in order for
736       // them to be vectorized (i.e. loop invariant).
737       if (CI) {
738         auto *SE = PSE.getSE();
739         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
740         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
741           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
742             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
743               reportVectorizationFailure("Found unvectorizable intrinsic",
744                   "intrinsic instruction cannot be vectorized",
745                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
746               return false;
747             }
748           }
749       }
750 
751       // Check that the instruction return type is vectorizable.
752       // Also, we can't vectorize extractelement instructions.
753       if ((!VectorType::isValidElementType(I.getType()) &&
754            !I.getType()->isVoidTy()) ||
755           isa<ExtractElementInst>(I)) {
756         reportVectorizationFailure("Found unvectorizable type",
757             "instruction return type cannot be vectorized",
758             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
759         return false;
760       }
761 
762       // Check that the stored type is vectorizable.
763       if (auto *ST = dyn_cast<StoreInst>(&I)) {
764         Type *T = ST->getValueOperand()->getType();
765         if (!VectorType::isValidElementType(T)) {
766           reportVectorizationFailure("Store instruction cannot be vectorized",
767                                      "store instruction cannot be vectorized",
768                                      "CantVectorizeStore", ORE, TheLoop, ST);
769           return false;
770         }
771 
772         // For nontemporal stores, check that a nontemporal vector version is
773         // supported on the target.
774         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
775           // Arbitrarily try a vector of 2 elements.
776           auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
777           assert(VecTy && "did not find vectorized version of stored type");
778           if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
779             reportVectorizationFailure(
780                 "nontemporal store instruction cannot be vectorized",
781                 "nontemporal store instruction cannot be vectorized",
782                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
783             return false;
784           }
785         }
786 
787       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
788         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
789           // For nontemporal loads, check that a nontemporal vector version is
790           // supported on the target (arbitrarily try a vector of 2 elements).
791           auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
792           assert(VecTy && "did not find vectorized version of load type");
793           if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
794             reportVectorizationFailure(
795                 "nontemporal load instruction cannot be vectorized",
796                 "nontemporal load instruction cannot be vectorized",
797                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
798             return false;
799           }
800         }
801 
802         // FP instructions can allow unsafe algebra, thus vectorizable by
803         // non-IEEE-754 compliant SIMD units.
804         // This applies to floating-point math operations and calls, not memory
805         // operations, shuffles, or casts, as they don't change precision or
806         // semantics.
807       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
808                  !I.isFast()) {
809         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
810         Hints->setPotentiallyUnsafe();
811       }
812 
813       // Reduction instructions are allowed to have exit users.
814       // All other instructions must not have external users.
815       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
816         // We can safely vectorize loops where instructions within the loop are
817         // used outside the loop only if the SCEV predicates within the loop is
818         // same as outside the loop. Allowing the exit means reusing the SCEV
819         // outside the loop.
820         if (PSE.getUnionPredicate().isAlwaysTrue()) {
821           AllowedExit.insert(&I);
822           continue;
823         }
824         reportVectorizationFailure("Value cannot be used outside the loop",
825                                    "value cannot be used outside the loop",
826                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
827         return false;
828       }
829     } // next instr.
830   }
831 
832   if (!PrimaryInduction) {
833     if (Inductions.empty()) {
834       reportVectorizationFailure("Did not find one integer induction var",
835           "loop induction variable could not be identified",
836           "NoInductionVariable", ORE, TheLoop);
837       return false;
838     } else if (!WidestIndTy) {
839       reportVectorizationFailure("Did not find one integer induction var",
840           "integer loop induction variable could not be identified",
841           "NoIntegerInductionVariable", ORE, TheLoop);
842       return false;
843     } else {
844       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
845     }
846   }
847 
848   // For first order recurrences, we use the previous value (incoming value from
849   // the latch) to check if it dominates all users of the recurrence. Bail out
850   // if we have to sink such an instruction for another recurrence, as the
851   // dominance requirement may not hold after sinking.
852   BasicBlock *LoopLatch = TheLoop->getLoopLatch();
853   if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
854         Instruction *V =
855             cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
856         return SinkAfter.find(V) != SinkAfter.end();
857       }))
858     return false;
859 
860   // Now we know the widest induction type, check if our found induction
861   // is the same size. If it's not, unset it here and InnerLoopVectorizer
862   // will create another.
863   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
864     PrimaryInduction = nullptr;
865 
866   return true;
867 }
868 
869 bool LoopVectorizationLegality::canVectorizeMemory() {
870   LAI = &(*GetLAA)(*TheLoop);
871   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
872   if (LAR) {
873     ORE->emit([&]() {
874       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
875                                         "loop not vectorized: ", *LAR);
876     });
877   }
878   if (!LAI->canVectorizeMemory())
879     return false;
880 
881   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
882     reportVectorizationFailure("Stores to a uniform address",
883         "write to a loop invariant address could not be vectorized",
884         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
885     return false;
886   }
887   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
888   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
889 
890   return true;
891 }
892 
893 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
894   Value *In0 = const_cast<Value *>(V);
895   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
896   if (!PN)
897     return false;
898 
899   return Inductions.count(PN);
900 }
901 
902 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
903   auto *Inst = dyn_cast<Instruction>(V);
904   return (Inst && InductionCastsToIgnore.count(Inst));
905 }
906 
907 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
908   return isInductionPhi(V) || isCastedInductionVariable(V);
909 }
910 
911 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
912   return FirstOrderRecurrences.count(Phi);
913 }
914 
915 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
916   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
917 }
918 
919 bool LoopVectorizationLegality::blockCanBePredicated(
920     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
921   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
922 
923   for (Instruction &I : *BB) {
924     // Check that we don't have a constant expression that can trap as operand.
925     for (Value *Operand : I.operands()) {
926       if (auto *C = dyn_cast<Constant>(Operand))
927         if (C->canTrap())
928           return false;
929     }
930 
931     // We can predicate blocks with calls to assume, as long as we drop them in
932     // case we flatten the CFG via predication.
933     if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
934       ConditionalAssumes.insert(&I);
935       continue;
936     }
937 
938     // We might be able to hoist the load.
939     if (I.mayReadFromMemory()) {
940       auto *LI = dyn_cast<LoadInst>(&I);
941       if (!LI)
942         return false;
943       if (!SafePtrs.count(LI->getPointerOperand())) {
944         // !llvm.mem.parallel_loop_access implies if-conversion safety.
945         // Otherwise, record that the load needs (real or emulated) masking
946         // and let the cost model decide.
947         if (!IsAnnotatedParallel || PreserveGuards)
948           MaskedOp.insert(LI);
949         continue;
950       }
951     }
952 
953     if (I.mayWriteToMemory()) {
954       auto *SI = dyn_cast<StoreInst>(&I);
955       if (!SI)
956         return false;
957       // Predicated store requires some form of masking:
958       // 1) masked store HW instruction,
959       // 2) emulation via load-blend-store (only if safe and legal to do so,
960       //    be aware on the race conditions), or
961       // 3) element-by-element predicate check and scalar store.
962       MaskedOp.insert(SI);
963       continue;
964     }
965     if (I.mayThrow())
966       return false;
967   }
968 
969   return true;
970 }
971 
972 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
973   if (!EnableIfConversion) {
974     reportVectorizationFailure("If-conversion is disabled",
975                                "if-conversion is disabled",
976                                "IfConversionDisabled",
977                                ORE, TheLoop);
978     return false;
979   }
980 
981   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
982 
983   // A list of pointers which are known to be dereferenceable within scope of
984   // the loop body for each iteration of the loop which executes.  That is,
985   // the memory pointed to can be dereferenced (with the access size implied by
986   // the value's type) unconditionally within the loop header without
987   // introducing a new fault.
988   SmallPtrSet<Value *, 8> SafePointers;
989 
990   // Collect safe addresses.
991   for (BasicBlock *BB : TheLoop->blocks()) {
992     if (!blockNeedsPredication(BB)) {
993       for (Instruction &I : *BB)
994         if (auto *Ptr = getLoadStorePointerOperand(&I))
995           SafePointers.insert(Ptr);
996       continue;
997     }
998 
999     // For a block which requires predication, a address may be safe to access
1000     // in the loop w/o predication if we can prove dereferenceability facts
1001     // sufficient to ensure it'll never fault within the loop. For the moment,
1002     // we restrict this to loads; stores are more complicated due to
1003     // concurrency restrictions.
1004     ScalarEvolution &SE = *PSE.getSE();
1005     for (Instruction &I : *BB) {
1006       LoadInst *LI = dyn_cast<LoadInst>(&I);
1007       if (LI && !mustSuppressSpeculation(*LI) &&
1008           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
1009         SafePointers.insert(LI->getPointerOperand());
1010     }
1011   }
1012 
1013   // Collect the blocks that need predication.
1014   BasicBlock *Header = TheLoop->getHeader();
1015   for (BasicBlock *BB : TheLoop->blocks()) {
1016     // We don't support switch statements inside loops.
1017     if (!isa<BranchInst>(BB->getTerminator())) {
1018       reportVectorizationFailure("Loop contains a switch statement",
1019                                  "loop contains a switch statement",
1020                                  "LoopContainsSwitch", ORE, TheLoop,
1021                                  BB->getTerminator());
1022       return false;
1023     }
1024 
1025     // We must be able to predicate all blocks that need to be predicated.
1026     if (blockNeedsPredication(BB)) {
1027       if (!blockCanBePredicated(BB, SafePointers)) {
1028         reportVectorizationFailure(
1029             "Control flow cannot be substituted for a select",
1030             "control flow cannot be substituted for a select",
1031             "NoCFGForSelect", ORE, TheLoop,
1032             BB->getTerminator());
1033         return false;
1034       }
1035     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
1036       reportVectorizationFailure(
1037           "Control flow cannot be substituted for a select",
1038           "control flow cannot be substituted for a select",
1039           "NoCFGForSelect", ORE, TheLoop,
1040           BB->getTerminator());
1041       return false;
1042     }
1043   }
1044 
1045   // We can if-convert this loop.
1046   return true;
1047 }
1048 
1049 // Helper function to canVectorizeLoopNestCFG.
1050 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1051                                                     bool UseVPlanNativePath) {
1052   assert((UseVPlanNativePath || Lp->empty()) &&
1053          "VPlan-native path is not enabled.");
1054 
1055   // TODO: ORE should be improved to show more accurate information when an
1056   // outer loop can't be vectorized because a nested loop is not understood or
1057   // legal. Something like: "outer_loop_location: loop not vectorized:
1058   // (inner_loop_location) loop control flow is not understood by vectorizer".
1059 
1060   // Store the result and return it at the end instead of exiting early, in case
1061   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1062   bool Result = true;
1063   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1064 
1065   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1066   // be canonicalized.
1067   if (!Lp->getLoopPreheader()) {
1068     reportVectorizationFailure("Loop doesn't have a legal pre-header",
1069         "loop control flow is not understood by vectorizer",
1070         "CFGNotUnderstood", ORE, TheLoop);
1071     if (DoExtraAnalysis)
1072       Result = false;
1073     else
1074       return false;
1075   }
1076 
1077   // We must have a single backedge.
1078   if (Lp->getNumBackEdges() != 1) {
1079     reportVectorizationFailure("The loop must have a single backedge",
1080         "loop control flow is not understood by vectorizer",
1081         "CFGNotUnderstood", ORE, TheLoop);
1082     if (DoExtraAnalysis)
1083       Result = false;
1084     else
1085       return false;
1086   }
1087 
1088   // We must have a single exiting block.
1089   if (!Lp->getExitingBlock()) {
1090     reportVectorizationFailure("The loop must have an exiting block",
1091         "loop control flow is not understood by vectorizer",
1092         "CFGNotUnderstood", ORE, TheLoop);
1093     if (DoExtraAnalysis)
1094       Result = false;
1095     else
1096       return false;
1097   }
1098 
1099   // We only handle bottom-tested loops, i.e. loop in which the condition is
1100   // checked at the end of each iteration. With that we can assume that all
1101   // instructions in the loop are executed the same number of times.
1102   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
1103     reportVectorizationFailure("The exiting block is not the loop latch",
1104         "loop control flow is not understood by vectorizer",
1105         "CFGNotUnderstood", ORE, TheLoop);
1106     if (DoExtraAnalysis)
1107       Result = false;
1108     else
1109       return false;
1110   }
1111 
1112   return Result;
1113 }
1114 
1115 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1116     Loop *Lp, bool UseVPlanNativePath) {
1117   // Store the result and return it at the end instead of exiting early, in case
1118   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1119   bool Result = true;
1120   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1121   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1122     if (DoExtraAnalysis)
1123       Result = false;
1124     else
1125       return false;
1126   }
1127 
1128   // Recursively check whether the loop control flow of nested loops is
1129   // understood.
1130   for (Loop *SubLp : *Lp)
1131     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1132       if (DoExtraAnalysis)
1133         Result = false;
1134       else
1135         return false;
1136     }
1137 
1138   return Result;
1139 }
1140 
1141 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1142   // Store the result and return it at the end instead of exiting early, in case
1143   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1144   bool Result = true;
1145 
1146   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1147   // Check whether the loop-related control flow in the loop nest is expected by
1148   // vectorizer.
1149   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1150     if (DoExtraAnalysis)
1151       Result = false;
1152     else
1153       return false;
1154   }
1155 
1156   // We need to have a loop header.
1157   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1158                     << '\n');
1159 
1160   // Specific checks for outer loops. We skip the remaining legal checks at this
1161   // point because they don't support outer loops.
1162   if (!TheLoop->empty()) {
1163     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1164 
1165     if (!canVectorizeOuterLoop()) {
1166       reportVectorizationFailure("Unsupported outer loop",
1167                                  "unsupported outer loop",
1168                                  "UnsupportedOuterLoop",
1169                                  ORE, TheLoop);
1170       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1171       // outer loops.
1172       return false;
1173     }
1174 
1175     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1176     return Result;
1177   }
1178 
1179   assert(TheLoop->empty() && "Inner loop expected.");
1180   // Check if we can if-convert non-single-bb loops.
1181   unsigned NumBlocks = TheLoop->getNumBlocks();
1182   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1183     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1184     if (DoExtraAnalysis)
1185       Result = false;
1186     else
1187       return false;
1188   }
1189 
1190   // Check if we can vectorize the instructions and CFG in this loop.
1191   if (!canVectorizeInstrs()) {
1192     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1193     if (DoExtraAnalysis)
1194       Result = false;
1195     else
1196       return false;
1197   }
1198 
1199   // Go over each instruction and look at memory deps.
1200   if (!canVectorizeMemory()) {
1201     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1202     if (DoExtraAnalysis)
1203       Result = false;
1204     else
1205       return false;
1206   }
1207 
1208   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1209                     << (LAI->getRuntimePointerChecking()->Need
1210                             ? " (with a runtime bound check)"
1211                             : "")
1212                     << "!\n");
1213 
1214   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1215   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1216     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1217 
1218   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
1219     reportVectorizationFailure("Too many SCEV checks needed",
1220         "Too many SCEV assumptions need to be made and checked at runtime",
1221         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1222     if (DoExtraAnalysis)
1223       Result = false;
1224     else
1225       return false;
1226   }
1227 
1228   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1229   // we can vectorize, and at this point we don't have any other mem analysis
1230   // which may limit our maximum vectorization factor, so just return true with
1231   // no restrictions.
1232   return Result;
1233 }
1234 
1235 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1236 
1237   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1238 
1239   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1240 
1241   for (auto &Reduction : getReductionVars())
1242     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1243 
1244   // TODO: handle non-reduction outside users when tail is folded by masking.
1245   for (auto *AE : AllowedExit) {
1246     // Check that all users of allowed exit values are inside the loop or
1247     // are the live-out of a reduction.
1248     if (ReductionLiveOuts.count(AE))
1249       continue;
1250     for (User *U : AE->users()) {
1251       Instruction *UI = cast<Instruction>(U);
1252       if (TheLoop->contains(UI))
1253         continue;
1254       reportVectorizationFailure(
1255           "Cannot fold tail by masking, loop has an outside user for",
1256           "Cannot fold tail by masking in the presence of live outs.",
1257           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1258       return false;
1259     }
1260   }
1261 
1262   // The list of pointers that we can safely read and write to remains empty.
1263   SmallPtrSet<Value *, 8> SafePointers;
1264 
1265   // Check and mark all blocks for predication, including those that ordinarily
1266   // do not need predication such as the header block.
1267   for (BasicBlock *BB : TheLoop->blocks()) {
1268     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
1269       reportVectorizationFailure(
1270           "Cannot fold tail by masking as required",
1271           "control flow cannot be substituted for a select",
1272           "NoCFGForSelect", ORE, TheLoop,
1273           BB->getTerminator());
1274       return false;
1275     }
1276   }
1277 
1278   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1279   return true;
1280 }
1281 
1282 } // namespace llvm
1283