1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===//
2f2ec16ccSHideki Saito //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f2ec16ccSHideki Saito //
7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===//
8f2ec16ccSHideki Saito //
9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code
10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time.
11f2ec16ccSHideki Saito //
12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis
13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there
14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first).
15f2ec16ccSHideki Saito //
16ec818d7fSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorize.h"
17f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
187403569bSPhilip Reames #include "llvm/Analysis/Loads.h"
197403569bSPhilip Reames #include "llvm/Analysis/ValueTracking.h"
20f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h"
21f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h"
22f2ec16ccSHideki Saito 
23f2ec16ccSHideki Saito using namespace llvm;
24f2ec16ccSHideki Saito 
25f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize"
26f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME
27f2ec16ccSHideki Saito 
284e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication;
294e4ecae0SHideki Saito 
30f2ec16ccSHideki Saito static cl::opt<bool>
31f2ec16ccSHideki Saito     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
32f2ec16ccSHideki Saito                        cl::desc("Enable if-conversion during vectorization."));
33f2ec16ccSHideki Saito 
34f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
35f2ec16ccSHideki Saito     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
36f2ec16ccSHideki Saito     cl::desc("The maximum allowed number of runtime memory checks with a "
37f2ec16ccSHideki Saito              "vectorize(enable) pragma."));
38f2ec16ccSHideki Saito 
39f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
40f2ec16ccSHideki Saito     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
41f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed."));
42f2ec16ccSHideki Saito 
43f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
44f2ec16ccSHideki Saito     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
45f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed with a "
46f2ec16ccSHideki Saito              "vectorize(enable) pragma"));
47f2ec16ccSHideki Saito 
48f2ec16ccSHideki Saito /// Maximum vectorization interleave count.
49f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16;
50f2ec16ccSHideki Saito 
51f2ec16ccSHideki Saito namespace llvm {
52f2ec16ccSHideki Saito 
53f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) {
54f2ec16ccSHideki Saito   switch (Kind) {
55f2ec16ccSHideki Saito   case HK_WIDTH:
56f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
57f2ec16ccSHideki Saito   case HK_UNROLL:
58f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
59f2ec16ccSHideki Saito   case HK_FORCE:
60f2ec16ccSHideki Saito     return (Val <= 1);
61f2ec16ccSHideki Saito   case HK_ISVECTORIZED:
6220b198ecSSjoerd Meijer   case HK_PREDICATE:
63f2ec16ccSHideki Saito     return (Val == 0 || Val == 1);
64f2ec16ccSHideki Saito   }
65f2ec16ccSHideki Saito   return false;
66f2ec16ccSHideki Saito }
67f2ec16ccSHideki Saito 
68d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
69d4eb13c8SMichael Kruse                                        bool InterleaveOnlyWhenForced,
70f2ec16ccSHideki Saito                                        OptimizationRemarkEmitter &ORE)
71f2ec16ccSHideki Saito     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
72d4eb13c8SMichael Kruse       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
73f2ec16ccSHideki Saito       Force("vectorize.enable", FK_Undefined, HK_FORCE),
7420b198ecSSjoerd Meijer       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
7520b198ecSSjoerd Meijer       Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
7620b198ecSSjoerd Meijer       ORE(ORE) {
77f2ec16ccSHideki Saito   // Populate values with existing loop metadata.
78f2ec16ccSHideki Saito   getHintsFromMetadata();
79f2ec16ccSHideki Saito 
80f2ec16ccSHideki Saito   // force-vector-interleave overrides DisableInterleaving.
81f2ec16ccSHideki Saito   if (VectorizerParams::isInterleaveForced())
82f2ec16ccSHideki Saito     Interleave.Value = VectorizerParams::VectorizationInterleave;
83f2ec16ccSHideki Saito 
84f2ec16ccSHideki Saito   if (IsVectorized.Value != 1)
85f2ec16ccSHideki Saito     // If the vectorization width and interleaving count are both 1 then
86f2ec16ccSHideki Saito     // consider the loop to have been already vectorized because there's
87f2ec16ccSHideki Saito     // nothing more that we can do.
88f2ec16ccSHideki Saito     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
89d4eb13c8SMichael Kruse   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
90f2ec16ccSHideki Saito              << "LV: Interleaving disabled by the pass manager\n");
91f2ec16ccSHideki Saito }
92f2ec16ccSHideki Saito 
9377a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() {
9477a614a6SMichael Kruse   LLVMContext &Context = TheLoop->getHeader()->getContext();
9577a614a6SMichael Kruse 
9677a614a6SMichael Kruse   MDNode *IsVectorizedMD = MDNode::get(
9777a614a6SMichael Kruse       Context,
9877a614a6SMichael Kruse       {MDString::get(Context, "llvm.loop.isvectorized"),
9977a614a6SMichael Kruse        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
10077a614a6SMichael Kruse   MDNode *LoopID = TheLoop->getLoopID();
10177a614a6SMichael Kruse   MDNode *NewLoopID =
10277a614a6SMichael Kruse       makePostTransformationMetadata(Context, LoopID,
10377a614a6SMichael Kruse                                      {Twine(Prefix(), "vectorize.").str(),
10477a614a6SMichael Kruse                                       Twine(Prefix(), "interleave.").str()},
10577a614a6SMichael Kruse                                      {IsVectorizedMD});
10677a614a6SMichael Kruse   TheLoop->setLoopID(NewLoopID);
10777a614a6SMichael Kruse 
10877a614a6SMichael Kruse   // Update internal cache.
10977a614a6SMichael Kruse   IsVectorized.Value = 1;
11077a614a6SMichael Kruse }
11177a614a6SMichael Kruse 
112d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization(
113d4eb13c8SMichael Kruse     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
114f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled) {
115d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
116f2ec16ccSHideki Saito     emitRemarkWithHints();
117f2ec16ccSHideki Saito     return false;
118f2ec16ccSHideki Saito   }
119f2ec16ccSHideki Saito 
120d4eb13c8SMichael Kruse   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
121d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
122f2ec16ccSHideki Saito     emitRemarkWithHints();
123f2ec16ccSHideki Saito     return false;
124f2ec16ccSHideki Saito   }
125f2ec16ccSHideki Saito 
126f2ec16ccSHideki Saito   if (getIsVectorized() == 1) {
127d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
128f2ec16ccSHideki Saito     // FIXME: Add interleave.disable metadata. This will allow
129f2ec16ccSHideki Saito     // vectorize.disable to be used without disabling the pass and errors
130f2ec16ccSHideki Saito     // to differentiate between disabled vectorization and a width of 1.
131f2ec16ccSHideki Saito     ORE.emit([&]() {
132f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
133f2ec16ccSHideki Saito                                         "AllDisabled", L->getStartLoc(),
134f2ec16ccSHideki Saito                                         L->getHeader())
135f2ec16ccSHideki Saito              << "loop not vectorized: vectorization and interleaving are "
136f2ec16ccSHideki Saito                 "explicitly disabled, or the loop has already been "
137f2ec16ccSHideki Saito                 "vectorized";
138f2ec16ccSHideki Saito     });
139f2ec16ccSHideki Saito     return false;
140f2ec16ccSHideki Saito   }
141f2ec16ccSHideki Saito 
142f2ec16ccSHideki Saito   return true;
143f2ec16ccSHideki Saito }
144f2ec16ccSHideki Saito 
145f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const {
146f2ec16ccSHideki Saito   using namespace ore;
147f2ec16ccSHideki Saito 
148f2ec16ccSHideki Saito   ORE.emit([&]() {
149f2ec16ccSHideki Saito     if (Force.Value == LoopVectorizeHints::FK_Disabled)
150f2ec16ccSHideki Saito       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
151f2ec16ccSHideki Saito                                       TheLoop->getStartLoc(),
152f2ec16ccSHideki Saito                                       TheLoop->getHeader())
153f2ec16ccSHideki Saito              << "loop not vectorized: vectorization is explicitly disabled";
154f2ec16ccSHideki Saito     else {
155f2ec16ccSHideki Saito       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
156f2ec16ccSHideki Saito                                  TheLoop->getStartLoc(), TheLoop->getHeader());
157f2ec16ccSHideki Saito       R << "loop not vectorized";
158f2ec16ccSHideki Saito       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
159f2ec16ccSHideki Saito         R << " (Force=" << NV("Force", true);
160f2ec16ccSHideki Saito         if (Width.Value != 0)
161f2ec16ccSHideki Saito           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
162f2ec16ccSHideki Saito         if (Interleave.Value != 0)
163f2ec16ccSHideki Saito           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
164f2ec16ccSHideki Saito         R << ")";
165f2ec16ccSHideki Saito       }
166f2ec16ccSHideki Saito       return R;
167f2ec16ccSHideki Saito     }
168f2ec16ccSHideki Saito   });
169f2ec16ccSHideki Saito }
170f2ec16ccSHideki Saito 
171f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
172f2ec16ccSHideki Saito   if (getWidth() == 1)
173f2ec16ccSHideki Saito     return LV_NAME;
174f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled)
175f2ec16ccSHideki Saito     return LV_NAME;
176f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
177f2ec16ccSHideki Saito     return LV_NAME;
178f2ec16ccSHideki Saito   return OptimizationRemarkAnalysis::AlwaysPrint;
179f2ec16ccSHideki Saito }
180f2ec16ccSHideki Saito 
181f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() {
182f2ec16ccSHideki Saito   MDNode *LoopID = TheLoop->getLoopID();
183f2ec16ccSHideki Saito   if (!LoopID)
184f2ec16ccSHideki Saito     return;
185f2ec16ccSHideki Saito 
186f2ec16ccSHideki Saito   // First operand should refer to the loop id itself.
187f2ec16ccSHideki Saito   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
188f2ec16ccSHideki Saito   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
189f2ec16ccSHideki Saito 
190f2ec16ccSHideki Saito   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
191f2ec16ccSHideki Saito     const MDString *S = nullptr;
192f2ec16ccSHideki Saito     SmallVector<Metadata *, 4> Args;
193f2ec16ccSHideki Saito 
194f2ec16ccSHideki Saito     // The expected hint is either a MDString or a MDNode with the first
195f2ec16ccSHideki Saito     // operand a MDString.
196f2ec16ccSHideki Saito     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
197f2ec16ccSHideki Saito       if (!MD || MD->getNumOperands() == 0)
198f2ec16ccSHideki Saito         continue;
199f2ec16ccSHideki Saito       S = dyn_cast<MDString>(MD->getOperand(0));
200f2ec16ccSHideki Saito       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
201f2ec16ccSHideki Saito         Args.push_back(MD->getOperand(i));
202f2ec16ccSHideki Saito     } else {
203f2ec16ccSHideki Saito       S = dyn_cast<MDString>(LoopID->getOperand(i));
204f2ec16ccSHideki Saito       assert(Args.size() == 0 && "too many arguments for MDString");
205f2ec16ccSHideki Saito     }
206f2ec16ccSHideki Saito 
207f2ec16ccSHideki Saito     if (!S)
208f2ec16ccSHideki Saito       continue;
209f2ec16ccSHideki Saito 
210f2ec16ccSHideki Saito     // Check if the hint starts with the loop metadata prefix.
211f2ec16ccSHideki Saito     StringRef Name = S->getString();
212f2ec16ccSHideki Saito     if (Args.size() == 1)
213f2ec16ccSHideki Saito       setHint(Name, Args[0]);
214f2ec16ccSHideki Saito   }
215f2ec16ccSHideki Saito }
216f2ec16ccSHideki Saito 
217f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
218f2ec16ccSHideki Saito   if (!Name.startswith(Prefix()))
219f2ec16ccSHideki Saito     return;
220f2ec16ccSHideki Saito   Name = Name.substr(Prefix().size(), StringRef::npos);
221f2ec16ccSHideki Saito 
222f2ec16ccSHideki Saito   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
223f2ec16ccSHideki Saito   if (!C)
224f2ec16ccSHideki Saito     return;
225f2ec16ccSHideki Saito   unsigned Val = C->getZExtValue();
226f2ec16ccSHideki Saito 
22720b198ecSSjoerd Meijer   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
228f2ec16ccSHideki Saito   for (auto H : Hints) {
229f2ec16ccSHideki Saito     if (Name == H->Name) {
230f2ec16ccSHideki Saito       if (H->validate(Val))
231f2ec16ccSHideki Saito         H->Value = Val;
232f2ec16ccSHideki Saito       else
233d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
234f2ec16ccSHideki Saito       break;
235f2ec16ccSHideki Saito     }
236f2ec16ccSHideki Saito   }
237f2ec16ccSHideki Saito }
238f2ec16ccSHideki Saito 
239f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet(
240f2ec16ccSHideki Saito     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
241f2ec16ccSHideki Saito   const char *PassName = Hints.vectorizeAnalysisPassName();
242f2ec16ccSHideki Saito   bool Failed = false;
243f2ec16ccSHideki Saito   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
244f2ec16ccSHideki Saito     ORE.emit([&]() {
245f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisFPCommute(
246f2ec16ccSHideki Saito                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
247f2ec16ccSHideki Saito                  UnsafeAlgebraInst->getParent())
248f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
249f2ec16ccSHideki Saito                 "floating-point operations";
250f2ec16ccSHideki Saito     });
251f2ec16ccSHideki Saito     Failed = true;
252f2ec16ccSHideki Saito   }
253f2ec16ccSHideki Saito 
254f2ec16ccSHideki Saito   // Test if runtime memcheck thresholds are exceeded.
255f2ec16ccSHideki Saito   bool PragmaThresholdReached =
256f2ec16ccSHideki Saito       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
257f2ec16ccSHideki Saito   bool ThresholdReached =
258f2ec16ccSHideki Saito       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
259f2ec16ccSHideki Saito   if ((ThresholdReached && !Hints.allowReordering()) ||
260f2ec16ccSHideki Saito       PragmaThresholdReached) {
261f2ec16ccSHideki Saito     ORE.emit([&]() {
262f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
263f2ec16ccSHideki Saito                                                 L->getStartLoc(),
264f2ec16ccSHideki Saito                                                 L->getHeader())
265f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
266f2ec16ccSHideki Saito                 "memory operations";
267f2ec16ccSHideki Saito     });
268d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
269f2ec16ccSHideki Saito     Failed = true;
270f2ec16ccSHideki Saito   }
271f2ec16ccSHideki Saito 
272f2ec16ccSHideki Saito   return Failed;
273f2ec16ccSHideki Saito }
274f2ec16ccSHideki Saito 
275f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop
276f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
277f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is
278f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is
279f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions:
280f2ec16ccSHideki Saito //   1) it has a canonical IV (starting from 0 and with stride 1),
281f2ec16ccSHideki Saito //   2) its latch terminator is a conditional branch and,
282f2ec16ccSHideki Saito //   3) its latch condition is a compare instruction whose operands are the
283f2ec16ccSHideki Saito //      canonical IV and an OuterLp invariant.
284f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not
285f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity.
286f2ec16ccSHideki Saito //
287f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation
288f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop
289f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and
290f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for
291f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we
292f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer
293f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit
294f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away
295f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not
296f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate
297f2ec16ccSHideki Saito // function that is only executed once for each \p Lp.
298f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
299f2ec16ccSHideki Saito   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
300f2ec16ccSHideki Saito 
301f2ec16ccSHideki Saito   // If Lp is the outer loop, it's uniform by definition.
302f2ec16ccSHideki Saito   if (Lp == OuterLp)
303f2ec16ccSHideki Saito     return true;
304f2ec16ccSHideki Saito   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
305f2ec16ccSHideki Saito 
306f2ec16ccSHideki Saito   // 1.
307f2ec16ccSHideki Saito   PHINode *IV = Lp->getCanonicalInductionVariable();
308f2ec16ccSHideki Saito   if (!IV) {
309d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
310f2ec16ccSHideki Saito     return false;
311f2ec16ccSHideki Saito   }
312f2ec16ccSHideki Saito 
313f2ec16ccSHideki Saito   // 2.
314f2ec16ccSHideki Saito   BasicBlock *Latch = Lp->getLoopLatch();
315f2ec16ccSHideki Saito   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
316f2ec16ccSHideki Saito   if (!LatchBr || LatchBr->isUnconditional()) {
317d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
318f2ec16ccSHideki Saito     return false;
319f2ec16ccSHideki Saito   }
320f2ec16ccSHideki Saito 
321f2ec16ccSHideki Saito   // 3.
322f2ec16ccSHideki Saito   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
323f2ec16ccSHideki Saito   if (!LatchCmp) {
324d34e60caSNicola Zaghen     LLVM_DEBUG(
325d34e60caSNicola Zaghen         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
326f2ec16ccSHideki Saito     return false;
327f2ec16ccSHideki Saito   }
328f2ec16ccSHideki Saito 
329f2ec16ccSHideki Saito   Value *CondOp0 = LatchCmp->getOperand(0);
330f2ec16ccSHideki Saito   Value *CondOp1 = LatchCmp->getOperand(1);
331f2ec16ccSHideki Saito   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
332f2ec16ccSHideki Saito   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
333f2ec16ccSHideki Saito       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
334d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
335f2ec16ccSHideki Saito     return false;
336f2ec16ccSHideki Saito   }
337f2ec16ccSHideki Saito 
338f2ec16ccSHideki Saito   return true;
339f2ec16ccSHideki Saito }
340f2ec16ccSHideki Saito 
341f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p
342f2ec16ccSHideki Saito // OuterLp.
343f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
344f2ec16ccSHideki Saito   if (!isUniformLoop(Lp, OuterLp))
345f2ec16ccSHideki Saito     return false;
346f2ec16ccSHideki Saito 
347f2ec16ccSHideki Saito   // Check if nested loops are uniform.
348f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
349f2ec16ccSHideki Saito     if (!isUniformLoopNest(SubLp, OuterLp))
350f2ec16ccSHideki Saito       return false;
351f2ec16ccSHideki Saito 
352f2ec16ccSHideki Saito   return true;
353f2ec16ccSHideki Saito }
354f2ec16ccSHideki Saito 
3555f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node.
356f2ec16ccSHideki Saito ///
357f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if
358f2ec16ccSHideki Saito /// convert.
359f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) {
360f2ec16ccSHideki Saito   for (PHINode &Phi : BB->phis()) {
361f2ec16ccSHideki Saito     for (Value *V : Phi.incoming_values())
362f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(V))
363f2ec16ccSHideki Saito         if (C->canTrap())
364f2ec16ccSHideki Saito           return false;
365f2ec16ccSHideki Saito   }
366f2ec16ccSHideki Saito   return true;
367f2ec16ccSHideki Saito }
368f2ec16ccSHideki Saito 
369f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
370f2ec16ccSHideki Saito   if (Ty->isPointerTy())
371f2ec16ccSHideki Saito     return DL.getIntPtrType(Ty);
372f2ec16ccSHideki Saito 
373f2ec16ccSHideki Saito   // It is possible that char's or short's overflow when we ask for the loop's
374f2ec16ccSHideki Saito   // trip count, work around this by changing the type size.
375f2ec16ccSHideki Saito   if (Ty->getScalarSizeInBits() < 32)
376f2ec16ccSHideki Saito     return Type::getInt32Ty(Ty->getContext());
377f2ec16ccSHideki Saito 
378f2ec16ccSHideki Saito   return Ty;
379f2ec16ccSHideki Saito }
380f2ec16ccSHideki Saito 
381f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
382f2ec16ccSHideki Saito   Ty0 = convertPointerToIntegerType(DL, Ty0);
383f2ec16ccSHideki Saito   Ty1 = convertPointerToIntegerType(DL, Ty1);
384f2ec16ccSHideki Saito   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
385f2ec16ccSHideki Saito     return Ty0;
386f2ec16ccSHideki Saito   return Ty1;
387f2ec16ccSHideki Saito }
388f2ec16ccSHideki Saito 
3895f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an
390f2ec16ccSHideki Saito /// identified reduction variable.
391f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
392f2ec16ccSHideki Saito                                SmallPtrSetImpl<Value *> &AllowedExit) {
39360a1e4ddSAnna Thomas   // Reductions, Inductions and non-header phis are allowed to have exit users. All
394f2ec16ccSHideki Saito   // other instructions must not have external users.
395f2ec16ccSHideki Saito   if (!AllowedExit.count(Inst))
396f2ec16ccSHideki Saito     // Check that all of the users of the loop are inside the BB.
397f2ec16ccSHideki Saito     for (User *U : Inst->users()) {
398f2ec16ccSHideki Saito       Instruction *UI = cast<Instruction>(U);
399f2ec16ccSHideki Saito       // This user may be a reduction exit value.
400f2ec16ccSHideki Saito       if (!TheLoop->contains(UI)) {
401d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
402f2ec16ccSHideki Saito         return true;
403f2ec16ccSHideki Saito       }
404f2ec16ccSHideki Saito     }
405f2ec16ccSHideki Saito   return false;
406f2ec16ccSHideki Saito }
407f2ec16ccSHideki Saito 
408f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
409f2ec16ccSHideki Saito   const ValueToValueMap &Strides =
410f2ec16ccSHideki Saito       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
411f2ec16ccSHideki Saito 
412f2ec16ccSHideki Saito   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
413f2ec16ccSHideki Saito   if (Stride == 1 || Stride == -1)
414f2ec16ccSHideki Saito     return Stride;
415f2ec16ccSHideki Saito   return 0;
416f2ec16ccSHideki Saito }
417f2ec16ccSHideki Saito 
418f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) {
419f2ec16ccSHideki Saito   return LAI->isUniform(V);
420f2ec16ccSHideki Saito }
421f2ec16ccSHideki Saito 
422f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() {
423f2ec16ccSHideki Saito   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
424f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
425f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
426f2ec16ccSHideki Saito   bool Result = true;
427f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
428f2ec16ccSHideki Saito 
429f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
430f2ec16ccSHideki Saito     // Check whether the BB terminator is a BranchInst. Any other terminator is
431f2ec16ccSHideki Saito     // not supported yet.
432f2ec16ccSHideki Saito     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
433f2ec16ccSHideki Saito     if (!Br) {
4349e97caf5SRenato Golin       reportVectorizationFailure("Unsupported basic block terminator",
4359e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
436ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
437f2ec16ccSHideki Saito       if (DoExtraAnalysis)
438f2ec16ccSHideki Saito         Result = false;
439f2ec16ccSHideki Saito       else
440f2ec16ccSHideki Saito         return false;
441f2ec16ccSHideki Saito     }
442f2ec16ccSHideki Saito 
443f2ec16ccSHideki Saito     // Check whether the BranchInst is a supported one. Only unconditional
444f2ec16ccSHideki Saito     // branches, conditional branches with an outer loop invariant condition or
445f2ec16ccSHideki Saito     // backedges are supported.
4464e4ecae0SHideki Saito     // FIXME: We skip these checks when VPlan predication is enabled as we
4474e4ecae0SHideki Saito     // want to allow divergent branches. This whole check will be removed
4484e4ecae0SHideki Saito     // once VPlan predication is on by default.
4494e4ecae0SHideki Saito     if (!EnableVPlanPredication && Br && Br->isConditional() &&
450f2ec16ccSHideki Saito         !TheLoop->isLoopInvariant(Br->getCondition()) &&
451f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(0)) &&
452f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(1))) {
4539e97caf5SRenato Golin       reportVectorizationFailure("Unsupported conditional branch",
4549e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
455ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
456f2ec16ccSHideki Saito       if (DoExtraAnalysis)
457f2ec16ccSHideki Saito         Result = false;
458f2ec16ccSHideki Saito       else
459f2ec16ccSHideki Saito         return false;
460f2ec16ccSHideki Saito     }
461f2ec16ccSHideki Saito   }
462f2ec16ccSHideki Saito 
463f2ec16ccSHideki Saito   // Check whether inner loops are uniform. At this point, we only support
464f2ec16ccSHideki Saito   // simple outer loops scenarios with uniform nested loops.
465f2ec16ccSHideki Saito   if (!isUniformLoopNest(TheLoop /*loop nest*/,
466f2ec16ccSHideki Saito                          TheLoop /*context outer loop*/)) {
4679e97caf5SRenato Golin     reportVectorizationFailure("Outer loop contains divergent loops",
4689e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
469ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
470f2ec16ccSHideki Saito     if (DoExtraAnalysis)
471f2ec16ccSHideki Saito       Result = false;
472f2ec16ccSHideki Saito     else
473f2ec16ccSHideki Saito       return false;
474f2ec16ccSHideki Saito   }
475f2ec16ccSHideki Saito 
476ea7f3035SHideki Saito   // Check whether we are able to set up outer loop induction.
477ea7f3035SHideki Saito   if (!setupOuterLoopInductions()) {
4789e97caf5SRenato Golin     reportVectorizationFailure("Unsupported outer loop Phi(s)",
4799e97caf5SRenato Golin                                "Unsupported outer loop Phi(s)",
480ec818d7fSHideki Saito                                "UnsupportedPhi", ORE, TheLoop);
481ea7f3035SHideki Saito     if (DoExtraAnalysis)
482ea7f3035SHideki Saito       Result = false;
483ea7f3035SHideki Saito     else
484ea7f3035SHideki Saito       return false;
485ea7f3035SHideki Saito   }
486ea7f3035SHideki Saito 
487f2ec16ccSHideki Saito   return Result;
488f2ec16ccSHideki Saito }
489f2ec16ccSHideki Saito 
490f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi(
491f2ec16ccSHideki Saito     PHINode *Phi, const InductionDescriptor &ID,
492f2ec16ccSHideki Saito     SmallPtrSetImpl<Value *> &AllowedExit) {
493f2ec16ccSHideki Saito   Inductions[Phi] = ID;
494f2ec16ccSHideki Saito 
495f2ec16ccSHideki Saito   // In case this induction also comes with casts that we know we can ignore
496f2ec16ccSHideki Saito   // in the vectorized loop body, record them here. All casts could be recorded
497f2ec16ccSHideki Saito   // here for ignoring, but suffices to record only the first (as it is the
498f2ec16ccSHideki Saito   // only one that may bw used outside the cast sequence).
499f2ec16ccSHideki Saito   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
500f2ec16ccSHideki Saito   if (!Casts.empty())
501f2ec16ccSHideki Saito     InductionCastsToIgnore.insert(*Casts.begin());
502f2ec16ccSHideki Saito 
503f2ec16ccSHideki Saito   Type *PhiTy = Phi->getType();
504f2ec16ccSHideki Saito   const DataLayout &DL = Phi->getModule()->getDataLayout();
505f2ec16ccSHideki Saito 
506f2ec16ccSHideki Saito   // Get the widest type.
507f2ec16ccSHideki Saito   if (!PhiTy->isFloatingPointTy()) {
508f2ec16ccSHideki Saito     if (!WidestIndTy)
509f2ec16ccSHideki Saito       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
510f2ec16ccSHideki Saito     else
511f2ec16ccSHideki Saito       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
512f2ec16ccSHideki Saito   }
513f2ec16ccSHideki Saito 
514f2ec16ccSHideki Saito   // Int inductions are special because we only allow one IV.
515f2ec16ccSHideki Saito   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
516f2ec16ccSHideki Saito       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
517f2ec16ccSHideki Saito       isa<Constant>(ID.getStartValue()) &&
518f2ec16ccSHideki Saito       cast<Constant>(ID.getStartValue())->isNullValue()) {
519f2ec16ccSHideki Saito 
520f2ec16ccSHideki Saito     // Use the phi node with the widest type as induction. Use the last
521f2ec16ccSHideki Saito     // one if there are multiple (no good reason for doing this other
522f2ec16ccSHideki Saito     // than it is expedient). We've checked that it begins at zero and
523f2ec16ccSHideki Saito     // steps by one, so this is a canonical induction variable.
524f2ec16ccSHideki Saito     if (!PrimaryInduction || PhiTy == WidestIndTy)
525f2ec16ccSHideki Saito       PrimaryInduction = Phi;
526f2ec16ccSHideki Saito   }
527f2ec16ccSHideki Saito 
528f2ec16ccSHideki Saito   // Both the PHI node itself, and the "post-increment" value feeding
529f2ec16ccSHideki Saito   // back into the PHI node may have external users.
530f2ec16ccSHideki Saito   // We can allow those uses, except if the SCEVs we have for them rely
531f2ec16ccSHideki Saito   // on predicates that only hold within the loop, since allowing the exit
5326a1dd77fSAnna Thomas   // currently means re-using this SCEV outside the loop (see PR33706 for more
5336a1dd77fSAnna Thomas   // details).
534f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().isAlwaysTrue()) {
535f2ec16ccSHideki Saito     AllowedExit.insert(Phi);
536f2ec16ccSHideki Saito     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
537f2ec16ccSHideki Saito   }
538f2ec16ccSHideki Saito 
539d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
540f2ec16ccSHideki Saito }
541f2ec16ccSHideki Saito 
542ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() {
543ea7f3035SHideki Saito   BasicBlock *Header = TheLoop->getHeader();
544ea7f3035SHideki Saito 
545ea7f3035SHideki Saito   // Returns true if a given Phi is a supported induction.
546ea7f3035SHideki Saito   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
547ea7f3035SHideki Saito     InductionDescriptor ID;
548ea7f3035SHideki Saito     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
549ea7f3035SHideki Saito         ID.getKind() == InductionDescriptor::IK_IntInduction) {
550ea7f3035SHideki Saito       addInductionPhi(&Phi, ID, AllowedExit);
551ea7f3035SHideki Saito       return true;
552ea7f3035SHideki Saito     } else {
553ea7f3035SHideki Saito       // Bail out for any Phi in the outer loop header that is not a supported
554ea7f3035SHideki Saito       // induction.
555ea7f3035SHideki Saito       LLVM_DEBUG(
556ea7f3035SHideki Saito           dbgs()
557ea7f3035SHideki Saito           << "LV: Found unsupported PHI for outer loop vectorization.\n");
558ea7f3035SHideki Saito       return false;
559ea7f3035SHideki Saito     }
560ea7f3035SHideki Saito   };
561ea7f3035SHideki Saito 
562ea7f3035SHideki Saito   if (llvm::all_of(Header->phis(), isSupportedPhi))
563ea7f3035SHideki Saito     return true;
564ea7f3035SHideki Saito   else
565ea7f3035SHideki Saito     return false;
566ea7f3035SHideki Saito }
567ea7f3035SHideki Saito 
568f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() {
569f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
570f2ec16ccSHideki Saito 
571f2ec16ccSHideki Saito   // Look for the attribute signaling the absence of NaNs.
572f2ec16ccSHideki Saito   Function &F = *Header->getParent();
573f2ec16ccSHideki Saito   HasFunNoNaNAttr =
574f2ec16ccSHideki Saito       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
575f2ec16ccSHideki Saito 
576f2ec16ccSHideki Saito   // For each block in the loop.
577f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
578f2ec16ccSHideki Saito     // Scan the instructions in the block and look for hazards.
579f2ec16ccSHideki Saito     for (Instruction &I : *BB) {
580f2ec16ccSHideki Saito       if (auto *Phi = dyn_cast<PHINode>(&I)) {
581f2ec16ccSHideki Saito         Type *PhiTy = Phi->getType();
582f2ec16ccSHideki Saito         // Check that this PHI type is allowed.
583f2ec16ccSHideki Saito         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
584f2ec16ccSHideki Saito             !PhiTy->isPointerTy()) {
5859e97caf5SRenato Golin           reportVectorizationFailure("Found a non-int non-pointer PHI",
5869e97caf5SRenato Golin                                      "loop control flow is not understood by vectorizer",
587ec818d7fSHideki Saito                                      "CFGNotUnderstood", ORE, TheLoop);
588f2ec16ccSHideki Saito           return false;
589f2ec16ccSHideki Saito         }
590f2ec16ccSHideki Saito 
591f2ec16ccSHideki Saito         // If this PHINode is not in the header block, then we know that we
592f2ec16ccSHideki Saito         // can convert it to select during if-conversion. No need to check if
593f2ec16ccSHideki Saito         // the PHIs in this block are induction or reduction variables.
594f2ec16ccSHideki Saito         if (BB != Header) {
59560a1e4ddSAnna Thomas           // Non-header phi nodes that have outside uses can be vectorized. Add
59660a1e4ddSAnna Thomas           // them to the list of allowed exits.
59760a1e4ddSAnna Thomas           // Unsafe cyclic dependencies with header phis are identified during
59860a1e4ddSAnna Thomas           // legalization for reduction, induction and first order
59960a1e4ddSAnna Thomas           // recurrences.
600dd18ce45SBjorn Pettersson           AllowedExit.insert(&I);
601f2ec16ccSHideki Saito           continue;
602f2ec16ccSHideki Saito         }
603f2ec16ccSHideki Saito 
604f2ec16ccSHideki Saito         // We only allow if-converted PHIs with exactly two incoming values.
605f2ec16ccSHideki Saito         if (Phi->getNumIncomingValues() != 2) {
6069e97caf5SRenato Golin           reportVectorizationFailure("Found an invalid PHI",
6079e97caf5SRenato Golin               "loop control flow is not understood by vectorizer",
608ec818d7fSHideki Saito               "CFGNotUnderstood", ORE, TheLoop, Phi);
609f2ec16ccSHideki Saito           return false;
610f2ec16ccSHideki Saito         }
611f2ec16ccSHideki Saito 
612f2ec16ccSHideki Saito         RecurrenceDescriptor RedDes;
613f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
614f2ec16ccSHideki Saito                                                  DT)) {
615f2ec16ccSHideki Saito           if (RedDes.hasUnsafeAlgebra())
616f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
617f2ec16ccSHideki Saito           AllowedExit.insert(RedDes.getLoopExitInstr());
618f2ec16ccSHideki Saito           Reductions[Phi] = RedDes;
619f2ec16ccSHideki Saito           continue;
620f2ec16ccSHideki Saito         }
621f2ec16ccSHideki Saito 
622b02b0ad8SAnna Thomas         // TODO: Instead of recording the AllowedExit, it would be good to record the
623b02b0ad8SAnna Thomas         // complementary set: NotAllowedExit. These include (but may not be
624b02b0ad8SAnna Thomas         // limited to):
625b02b0ad8SAnna Thomas         // 1. Reduction phis as they represent the one-before-last value, which
626b02b0ad8SAnna Thomas         // is not available when vectorized
627b02b0ad8SAnna Thomas         // 2. Induction phis and increment when SCEV predicates cannot be used
628b02b0ad8SAnna Thomas         // outside the loop - see addInductionPhi
629b02b0ad8SAnna Thomas         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
630b02b0ad8SAnna Thomas         // outside the loop - see call to hasOutsideLoopUser in the non-phi
631b02b0ad8SAnna Thomas         // handling below
632b02b0ad8SAnna Thomas         // 4. FirstOrderRecurrence phis that can possibly be handled by
633b02b0ad8SAnna Thomas         // extraction.
634b02b0ad8SAnna Thomas         // By recording these, we can then reason about ways to vectorize each
635b02b0ad8SAnna Thomas         // of these NotAllowedExit.
636f2ec16ccSHideki Saito         InductionDescriptor ID;
637f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
638f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
639f2ec16ccSHideki Saito           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
640f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
641f2ec16ccSHideki Saito           continue;
642f2ec16ccSHideki Saito         }
643f2ec16ccSHideki Saito 
644f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
645f2ec16ccSHideki Saito                                                          SinkAfter, DT)) {
646f2ec16ccSHideki Saito           FirstOrderRecurrences.insert(Phi);
647f2ec16ccSHideki Saito           continue;
648f2ec16ccSHideki Saito         }
649f2ec16ccSHideki Saito 
650f2ec16ccSHideki Saito         // As a last resort, coerce the PHI to a AddRec expression
651f2ec16ccSHideki Saito         // and re-try classifying it a an induction PHI.
652f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
653f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
654f2ec16ccSHideki Saito           continue;
655f2ec16ccSHideki Saito         }
656f2ec16ccSHideki Saito 
6579e97caf5SRenato Golin         reportVectorizationFailure("Found an unidentified PHI",
6589e97caf5SRenato Golin             "value that could not be identified as "
6599e97caf5SRenato Golin             "reduction is used outside the loop",
660ec818d7fSHideki Saito             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
661f2ec16ccSHideki Saito         return false;
662f2ec16ccSHideki Saito       } // end of PHI handling
663f2ec16ccSHideki Saito 
664f2ec16ccSHideki Saito       // We handle calls that:
665f2ec16ccSHideki Saito       //   * Are debug info intrinsics.
666f2ec16ccSHideki Saito       //   * Have a mapping to an IR intrinsic.
667f2ec16ccSHideki Saito       //   * Have a vector version available.
668f2ec16ccSHideki Saito       auto *CI = dyn_cast<CallInst>(&I);
669f2ec16ccSHideki Saito       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
670f2ec16ccSHideki Saito           !isa<DbgInfoIntrinsic>(CI) &&
671f2ec16ccSHideki Saito           !(CI->getCalledFunction() && TLI &&
672f2ec16ccSHideki Saito             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
6737d65fe5cSSanjay Patel         // If the call is a recognized math libary call, it is likely that
6747d65fe5cSSanjay Patel         // we can vectorize it given loosened floating-point constraints.
6757d65fe5cSSanjay Patel         LibFunc Func;
6767d65fe5cSSanjay Patel         bool IsMathLibCall =
6777d65fe5cSSanjay Patel             TLI && CI->getCalledFunction() &&
6787d65fe5cSSanjay Patel             CI->getType()->isFloatingPointTy() &&
6797d65fe5cSSanjay Patel             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
6807d65fe5cSSanjay Patel             TLI->hasOptimizedCodeGen(Func);
6817d65fe5cSSanjay Patel 
6827d65fe5cSSanjay Patel         if (IsMathLibCall) {
6837d65fe5cSSanjay Patel           // TODO: Ideally, we should not use clang-specific language here,
6847d65fe5cSSanjay Patel           // but it's hard to provide meaningful yet generic advice.
6857d65fe5cSSanjay Patel           // Also, should this be guarded by allowExtraAnalysis() and/or be part
6867d65fe5cSSanjay Patel           // of the returned info from isFunctionVectorizable()?
6879e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
6889e97caf5SRenato Golin               "library call cannot be vectorized. "
6897d65fe5cSSanjay Patel               "Try compiling with -fno-math-errno, -ffast-math, "
6909e97caf5SRenato Golin               "or similar flags",
691ec818d7fSHideki Saito               "CantVectorizeLibcall", ORE, TheLoop, CI);
6927d65fe5cSSanjay Patel         } else {
6939e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
6949e97caf5SRenato Golin                                      "call instruction cannot be vectorized",
695ec818d7fSHideki Saito                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
6967d65fe5cSSanjay Patel         }
697f2ec16ccSHideki Saito         return false;
698f2ec16ccSHideki Saito       }
699f2ec16ccSHideki Saito 
700a066f1f9SSimon Pilgrim       // Some intrinsics have scalar arguments and should be same in order for
701a066f1f9SSimon Pilgrim       // them to be vectorized (i.e. loop invariant).
702a066f1f9SSimon Pilgrim       if (CI) {
703f2ec16ccSHideki Saito         auto *SE = PSE.getSE();
704a066f1f9SSimon Pilgrim         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
705a066f1f9SSimon Pilgrim         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
706a066f1f9SSimon Pilgrim           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
707a066f1f9SSimon Pilgrim             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
7089e97caf5SRenato Golin               reportVectorizationFailure("Found unvectorizable intrinsic",
7099e97caf5SRenato Golin                   "intrinsic instruction cannot be vectorized",
710ec818d7fSHideki Saito                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
711f2ec16ccSHideki Saito               return false;
712f2ec16ccSHideki Saito             }
713f2ec16ccSHideki Saito           }
714a066f1f9SSimon Pilgrim       }
715f2ec16ccSHideki Saito 
716f2ec16ccSHideki Saito       // Check that the instruction return type is vectorizable.
717f2ec16ccSHideki Saito       // Also, we can't vectorize extractelement instructions.
718f2ec16ccSHideki Saito       if ((!VectorType::isValidElementType(I.getType()) &&
719f2ec16ccSHideki Saito            !I.getType()->isVoidTy()) ||
720f2ec16ccSHideki Saito           isa<ExtractElementInst>(I)) {
7219e97caf5SRenato Golin         reportVectorizationFailure("Found unvectorizable type",
7229e97caf5SRenato Golin             "instruction return type cannot be vectorized",
723ec818d7fSHideki Saito             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
724f2ec16ccSHideki Saito         return false;
725f2ec16ccSHideki Saito       }
726f2ec16ccSHideki Saito 
727f2ec16ccSHideki Saito       // Check that the stored type is vectorizable.
728f2ec16ccSHideki Saito       if (auto *ST = dyn_cast<StoreInst>(&I)) {
729f2ec16ccSHideki Saito         Type *T = ST->getValueOperand()->getType();
730f2ec16ccSHideki Saito         if (!VectorType::isValidElementType(T)) {
7319e97caf5SRenato Golin           reportVectorizationFailure("Store instruction cannot be vectorized",
7329e97caf5SRenato Golin                                      "store instruction cannot be vectorized",
733ec818d7fSHideki Saito                                      "CantVectorizeStore", ORE, TheLoop, ST);
734f2ec16ccSHideki Saito           return false;
735f2ec16ccSHideki Saito         }
736f2ec16ccSHideki Saito 
7376452bdd2SWarren Ristow         // For nontemporal stores, check that a nontemporal vector version is
7386452bdd2SWarren Ristow         // supported on the target.
7396452bdd2SWarren Ristow         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
7406452bdd2SWarren Ristow           // Arbitrarily try a vector of 2 elements.
7416452bdd2SWarren Ristow           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
7426452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of stored type");
7436452bdd2SWarren Ristow           unsigned Alignment = getLoadStoreAlignment(ST);
74433671cefSGuillaume Chatelet           assert(Alignment && "Alignment should be set");
745*18f805a7SGuillaume Chatelet           if (!TTI->isLegalNTStore(VecTy, Align(Alignment))) {
7466452bdd2SWarren Ristow             reportVectorizationFailure(
7476452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
7486452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
749ec818d7fSHideki Saito                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
7506452bdd2SWarren Ristow             return false;
7516452bdd2SWarren Ristow           }
7526452bdd2SWarren Ristow         }
7536452bdd2SWarren Ristow 
7546452bdd2SWarren Ristow       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
7556452bdd2SWarren Ristow         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
7566452bdd2SWarren Ristow           // For nontemporal loads, check that a nontemporal vector version is
7576452bdd2SWarren Ristow           // supported on the target (arbitrarily try a vector of 2 elements).
7586452bdd2SWarren Ristow           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
7596452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of load type");
7606452bdd2SWarren Ristow           unsigned Alignment = getLoadStoreAlignment(LD);
76133671cefSGuillaume Chatelet           assert(Alignment && "Alignment should be set");
762*18f805a7SGuillaume Chatelet           if (!TTI->isLegalNTLoad(VecTy, Align(Alignment))) {
7636452bdd2SWarren Ristow             reportVectorizationFailure(
7646452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
7656452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
766ec818d7fSHideki Saito                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
7676452bdd2SWarren Ristow             return false;
7686452bdd2SWarren Ristow           }
7696452bdd2SWarren Ristow         }
7706452bdd2SWarren Ristow 
771f2ec16ccSHideki Saito         // FP instructions can allow unsafe algebra, thus vectorizable by
772f2ec16ccSHideki Saito         // non-IEEE-754 compliant SIMD units.
773f2ec16ccSHideki Saito         // This applies to floating-point math operations and calls, not memory
774f2ec16ccSHideki Saito         // operations, shuffles, or casts, as they don't change precision or
775f2ec16ccSHideki Saito         // semantics.
776f2ec16ccSHideki Saito       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
777f2ec16ccSHideki Saito                  !I.isFast()) {
778d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
779f2ec16ccSHideki Saito         Hints->setPotentiallyUnsafe();
780f2ec16ccSHideki Saito       }
781f2ec16ccSHideki Saito 
782f2ec16ccSHideki Saito       // Reduction instructions are allowed to have exit users.
783f2ec16ccSHideki Saito       // All other instructions must not have external users.
784f2ec16ccSHideki Saito       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
785b02b0ad8SAnna Thomas         // We can safely vectorize loops where instructions within the loop are
786b02b0ad8SAnna Thomas         // used outside the loop only if the SCEV predicates within the loop is
787b02b0ad8SAnna Thomas         // same as outside the loop. Allowing the exit means reusing the SCEV
788b02b0ad8SAnna Thomas         // outside the loop.
789b02b0ad8SAnna Thomas         if (PSE.getUnionPredicate().isAlwaysTrue()) {
790b02b0ad8SAnna Thomas           AllowedExit.insert(&I);
791b02b0ad8SAnna Thomas           continue;
792b02b0ad8SAnna Thomas         }
7939e97caf5SRenato Golin         reportVectorizationFailure("Value cannot be used outside the loop",
7949e97caf5SRenato Golin                                    "value cannot be used outside the loop",
795ec818d7fSHideki Saito                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
796f2ec16ccSHideki Saito         return false;
797f2ec16ccSHideki Saito       }
798f2ec16ccSHideki Saito     } // next instr.
799f2ec16ccSHideki Saito   }
800f2ec16ccSHideki Saito 
801f2ec16ccSHideki Saito   if (!PrimaryInduction) {
802f2ec16ccSHideki Saito     if (Inductions.empty()) {
8039e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8049e97caf5SRenato Golin           "loop induction variable could not be identified",
805ec818d7fSHideki Saito           "NoInductionVariable", ORE, TheLoop);
806f2ec16ccSHideki Saito       return false;
8074f27730eSWarren Ristow     } else if (!WidestIndTy) {
8089e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8099e97caf5SRenato Golin           "integer loop induction variable could not be identified",
810ec818d7fSHideki Saito           "NoIntegerInductionVariable", ORE, TheLoop);
8114f27730eSWarren Ristow       return false;
8129e97caf5SRenato Golin     } else {
8139e97caf5SRenato Golin       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
814f2ec16ccSHideki Saito     }
815f2ec16ccSHideki Saito   }
816f2ec16ccSHideki Saito 
817f2ec16ccSHideki Saito   // Now we know the widest induction type, check if our found induction
818f2ec16ccSHideki Saito   // is the same size. If it's not, unset it here and InnerLoopVectorizer
819f2ec16ccSHideki Saito   // will create another.
820f2ec16ccSHideki Saito   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
821f2ec16ccSHideki Saito     PrimaryInduction = nullptr;
822f2ec16ccSHideki Saito 
823f2ec16ccSHideki Saito   return true;
824f2ec16ccSHideki Saito }
825f2ec16ccSHideki Saito 
826f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() {
827f2ec16ccSHideki Saito   LAI = &(*GetLAA)(*TheLoop);
828f2ec16ccSHideki Saito   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
829f2ec16ccSHideki Saito   if (LAR) {
830f2ec16ccSHideki Saito     ORE->emit([&]() {
831f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
832f2ec16ccSHideki Saito                                         "loop not vectorized: ", *LAR);
833f2ec16ccSHideki Saito     });
834f2ec16ccSHideki Saito   }
835f2ec16ccSHideki Saito   if (!LAI->canVectorizeMemory())
836f2ec16ccSHideki Saito     return false;
837f2ec16ccSHideki Saito 
8385e9215f0SAnna Thomas   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
8399e97caf5SRenato Golin     reportVectorizationFailure("Stores to a uniform address",
8409e97caf5SRenato Golin         "write to a loop invariant address could not be vectorized",
841ec818d7fSHideki Saito         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
842f2ec16ccSHideki Saito     return false;
843f2ec16ccSHideki Saito   }
844f2ec16ccSHideki Saito   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
845f2ec16ccSHideki Saito   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
846f2ec16ccSHideki Saito 
847f2ec16ccSHideki Saito   return true;
848f2ec16ccSHideki Saito }
849f2ec16ccSHideki Saito 
850f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
851f2ec16ccSHideki Saito   Value *In0 = const_cast<Value *>(V);
852f2ec16ccSHideki Saito   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
853f2ec16ccSHideki Saito   if (!PN)
854f2ec16ccSHideki Saito     return false;
855f2ec16ccSHideki Saito 
856f2ec16ccSHideki Saito   return Inductions.count(PN);
857f2ec16ccSHideki Saito }
858f2ec16ccSHideki Saito 
859f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
860f2ec16ccSHideki Saito   auto *Inst = dyn_cast<Instruction>(V);
861f2ec16ccSHideki Saito   return (Inst && InductionCastsToIgnore.count(Inst));
862f2ec16ccSHideki Saito }
863f2ec16ccSHideki Saito 
864f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
865f2ec16ccSHideki Saito   return isInductionPhi(V) || isCastedInductionVariable(V);
866f2ec16ccSHideki Saito }
867f2ec16ccSHideki Saito 
868f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
869f2ec16ccSHideki Saito   return FirstOrderRecurrences.count(Phi);
870f2ec16ccSHideki Saito }
871f2ec16ccSHideki Saito 
872f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
873f2ec16ccSHideki Saito   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
874f2ec16ccSHideki Saito }
875f2ec16ccSHideki Saito 
876f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated(
877d57d73daSDorit Nuzman     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
878f2ec16ccSHideki Saito   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
879f2ec16ccSHideki Saito 
880f2ec16ccSHideki Saito   for (Instruction &I : *BB) {
881f2ec16ccSHideki Saito     // Check that we don't have a constant expression that can trap as operand.
882f2ec16ccSHideki Saito     for (Value *Operand : I.operands()) {
883f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(Operand))
884f2ec16ccSHideki Saito         if (C->canTrap())
885f2ec16ccSHideki Saito           return false;
886f2ec16ccSHideki Saito     }
887f2ec16ccSHideki Saito     // We might be able to hoist the load.
888f2ec16ccSHideki Saito     if (I.mayReadFromMemory()) {
889f2ec16ccSHideki Saito       auto *LI = dyn_cast<LoadInst>(&I);
890f2ec16ccSHideki Saito       if (!LI)
891f2ec16ccSHideki Saito         return false;
892f2ec16ccSHideki Saito       if (!SafePtrs.count(LI->getPointerOperand())) {
893f2ec16ccSHideki Saito         // !llvm.mem.parallel_loop_access implies if-conversion safety.
894f2ec16ccSHideki Saito         // Otherwise, record that the load needs (real or emulated) masking
895f2ec16ccSHideki Saito         // and let the cost model decide.
896d57d73daSDorit Nuzman         if (!IsAnnotatedParallel || PreserveGuards)
897f2ec16ccSHideki Saito           MaskedOp.insert(LI);
898f2ec16ccSHideki Saito         continue;
899f2ec16ccSHideki Saito       }
900f2ec16ccSHideki Saito     }
901f2ec16ccSHideki Saito 
902f2ec16ccSHideki Saito     if (I.mayWriteToMemory()) {
903f2ec16ccSHideki Saito       auto *SI = dyn_cast<StoreInst>(&I);
904f2ec16ccSHideki Saito       if (!SI)
905f2ec16ccSHideki Saito         return false;
906f2ec16ccSHideki Saito       // Predicated store requires some form of masking:
907f2ec16ccSHideki Saito       // 1) masked store HW instruction,
908f2ec16ccSHideki Saito       // 2) emulation via load-blend-store (only if safe and legal to do so,
909f2ec16ccSHideki Saito       //    be aware on the race conditions), or
910f2ec16ccSHideki Saito       // 3) element-by-element predicate check and scalar store.
911f2ec16ccSHideki Saito       MaskedOp.insert(SI);
912f2ec16ccSHideki Saito       continue;
913f2ec16ccSHideki Saito     }
914f2ec16ccSHideki Saito     if (I.mayThrow())
915f2ec16ccSHideki Saito       return false;
916f2ec16ccSHideki Saito   }
917f2ec16ccSHideki Saito 
918f2ec16ccSHideki Saito   return true;
919f2ec16ccSHideki Saito }
920f2ec16ccSHideki Saito 
921f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
922f2ec16ccSHideki Saito   if (!EnableIfConversion) {
9239e97caf5SRenato Golin     reportVectorizationFailure("If-conversion is disabled",
9249e97caf5SRenato Golin                                "if-conversion is disabled",
925ec818d7fSHideki Saito                                "IfConversionDisabled",
926ec818d7fSHideki Saito                                ORE, TheLoop);
927f2ec16ccSHideki Saito     return false;
928f2ec16ccSHideki Saito   }
929f2ec16ccSHideki Saito 
930f2ec16ccSHideki Saito   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
931f2ec16ccSHideki Saito 
932cf3b5559SPhilip Reames   // A list of pointers which are known to be dereferenceable within scope of
933cf3b5559SPhilip Reames   // the loop body for each iteration of the loop which executes.  That is,
934cf3b5559SPhilip Reames   // the memory pointed to can be dereferenced (with the access size implied by
935cf3b5559SPhilip Reames   // the value's type) unconditionally within the loop header without
936cf3b5559SPhilip Reames   // introducing a new fault.
937f2ec16ccSHideki Saito   SmallPtrSet<Value *, 8> SafePointes;
938f2ec16ccSHideki Saito 
939f2ec16ccSHideki Saito   // Collect safe addresses.
940f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
9417403569bSPhilip Reames     if (!blockNeedsPredication(BB)) {
942f2ec16ccSHideki Saito       for (Instruction &I : *BB)
943f2ec16ccSHideki Saito         if (auto *Ptr = getLoadStorePointerOperand(&I))
944f2ec16ccSHideki Saito           SafePointes.insert(Ptr);
9457403569bSPhilip Reames       continue;
9467403569bSPhilip Reames     }
9477403569bSPhilip Reames 
9487403569bSPhilip Reames     // For a block which requires predication, a address may be safe to access
9497403569bSPhilip Reames     // in the loop w/o predication if we can prove dereferenceability facts
9507403569bSPhilip Reames     // sufficient to ensure it'll never fault within the loop. For the moment,
9517403569bSPhilip Reames     // we restrict this to loads; stores are more complicated due to
9527403569bSPhilip Reames     // concurrency restrictions.
9537403569bSPhilip Reames     ScalarEvolution &SE = *PSE.getSE();
9547403569bSPhilip Reames     for (Instruction &I : *BB) {
9557403569bSPhilip Reames       LoadInst *LI = dyn_cast<LoadInst>(&I);
9567403569bSPhilip Reames       if (LI && !mustSuppressSpeculation(*LI) &&
9577403569bSPhilip Reames           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
9587403569bSPhilip Reames         SafePointes.insert(LI->getPointerOperand());
9597403569bSPhilip Reames     }
960f2ec16ccSHideki Saito   }
961f2ec16ccSHideki Saito 
962f2ec16ccSHideki Saito   // Collect the blocks that need predication.
963f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
964f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
965f2ec16ccSHideki Saito     // We don't support switch statements inside loops.
966f2ec16ccSHideki Saito     if (!isa<BranchInst>(BB->getTerminator())) {
9679e97caf5SRenato Golin       reportVectorizationFailure("Loop contains a switch statement",
9689e97caf5SRenato Golin                                  "loop contains a switch statement",
969ec818d7fSHideki Saito                                  "LoopContainsSwitch", ORE, TheLoop,
970ec818d7fSHideki Saito                                  BB->getTerminator());
971f2ec16ccSHideki Saito       return false;
972f2ec16ccSHideki Saito     }
973f2ec16ccSHideki Saito 
974f2ec16ccSHideki Saito     // We must be able to predicate all blocks that need to be predicated.
975f2ec16ccSHideki Saito     if (blockNeedsPredication(BB)) {
976f2ec16ccSHideki Saito       if (!blockCanBePredicated(BB, SafePointes)) {
9779e97caf5SRenato Golin         reportVectorizationFailure(
9789e97caf5SRenato Golin             "Control flow cannot be substituted for a select",
9799e97caf5SRenato Golin             "control flow cannot be substituted for a select",
980ec818d7fSHideki Saito             "NoCFGForSelect", ORE, TheLoop,
981ec818d7fSHideki Saito             BB->getTerminator());
982f2ec16ccSHideki Saito         return false;
983f2ec16ccSHideki Saito       }
984f2ec16ccSHideki Saito     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
9859e97caf5SRenato Golin       reportVectorizationFailure(
9869e97caf5SRenato Golin           "Control flow cannot be substituted for a select",
9879e97caf5SRenato Golin           "control flow cannot be substituted for a select",
988ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
989ec818d7fSHideki Saito           BB->getTerminator());
990f2ec16ccSHideki Saito       return false;
991f2ec16ccSHideki Saito     }
992f2ec16ccSHideki Saito   }
993f2ec16ccSHideki Saito 
994f2ec16ccSHideki Saito   // We can if-convert this loop.
995f2ec16ccSHideki Saito   return true;
996f2ec16ccSHideki Saito }
997f2ec16ccSHideki Saito 
998f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG.
999f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1000f2ec16ccSHideki Saito                                                     bool UseVPlanNativePath) {
1001f2ec16ccSHideki Saito   assert((UseVPlanNativePath || Lp->empty()) &&
1002f2ec16ccSHideki Saito          "VPlan-native path is not enabled.");
1003f2ec16ccSHideki Saito 
1004f2ec16ccSHideki Saito   // TODO: ORE should be improved to show more accurate information when an
1005f2ec16ccSHideki Saito   // outer loop can't be vectorized because a nested loop is not understood or
1006f2ec16ccSHideki Saito   // legal. Something like: "outer_loop_location: loop not vectorized:
1007f2ec16ccSHideki Saito   // (inner_loop_location) loop control flow is not understood by vectorizer".
1008f2ec16ccSHideki Saito 
1009f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1010f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1011f2ec16ccSHideki Saito   bool Result = true;
1012f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1013f2ec16ccSHideki Saito 
1014f2ec16ccSHideki Saito   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1015f2ec16ccSHideki Saito   // be canonicalized.
1016f2ec16ccSHideki Saito   if (!Lp->getLoopPreheader()) {
10179e97caf5SRenato Golin     reportVectorizationFailure("Loop doesn't have a legal pre-header",
10189e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1019ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1020f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1021f2ec16ccSHideki Saito       Result = false;
1022f2ec16ccSHideki Saito     else
1023f2ec16ccSHideki Saito       return false;
1024f2ec16ccSHideki Saito   }
1025f2ec16ccSHideki Saito 
1026f2ec16ccSHideki Saito   // We must have a single backedge.
1027f2ec16ccSHideki Saito   if (Lp->getNumBackEdges() != 1) {
10289e97caf5SRenato Golin     reportVectorizationFailure("The loop must have a single backedge",
10299e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1030ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1031f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1032f2ec16ccSHideki Saito       Result = false;
1033f2ec16ccSHideki Saito     else
1034f2ec16ccSHideki Saito       return false;
1035f2ec16ccSHideki Saito   }
1036f2ec16ccSHideki Saito 
1037f2ec16ccSHideki Saito   // We must have a single exiting block.
1038f2ec16ccSHideki Saito   if (!Lp->getExitingBlock()) {
10399e97caf5SRenato Golin     reportVectorizationFailure("The loop must have an exiting block",
10409e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1041ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1042f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1043f2ec16ccSHideki Saito       Result = false;
1044f2ec16ccSHideki Saito     else
1045f2ec16ccSHideki Saito       return false;
1046f2ec16ccSHideki Saito   }
1047f2ec16ccSHideki Saito 
1048f2ec16ccSHideki Saito   // We only handle bottom-tested loops, i.e. loop in which the condition is
1049f2ec16ccSHideki Saito   // checked at the end of each iteration. With that we can assume that all
1050f2ec16ccSHideki Saito   // instructions in the loop are executed the same number of times.
1051f2ec16ccSHideki Saito   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
10529e97caf5SRenato Golin     reportVectorizationFailure("The exiting block is not the loop latch",
10539e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1054ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1055f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1056f2ec16ccSHideki Saito       Result = false;
1057f2ec16ccSHideki Saito     else
1058f2ec16ccSHideki Saito       return false;
1059f2ec16ccSHideki Saito   }
1060f2ec16ccSHideki Saito 
1061f2ec16ccSHideki Saito   return Result;
1062f2ec16ccSHideki Saito }
1063f2ec16ccSHideki Saito 
1064f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1065f2ec16ccSHideki Saito     Loop *Lp, bool UseVPlanNativePath) {
1066f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1067f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1068f2ec16ccSHideki Saito   bool Result = true;
1069f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1070f2ec16ccSHideki Saito   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1071f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1072f2ec16ccSHideki Saito       Result = false;
1073f2ec16ccSHideki Saito     else
1074f2ec16ccSHideki Saito       return false;
1075f2ec16ccSHideki Saito   }
1076f2ec16ccSHideki Saito 
1077f2ec16ccSHideki Saito   // Recursively check whether the loop control flow of nested loops is
1078f2ec16ccSHideki Saito   // understood.
1079f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
1080f2ec16ccSHideki Saito     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1081f2ec16ccSHideki Saito       if (DoExtraAnalysis)
1082f2ec16ccSHideki Saito         Result = false;
1083f2ec16ccSHideki Saito       else
1084f2ec16ccSHideki Saito         return false;
1085f2ec16ccSHideki Saito     }
1086f2ec16ccSHideki Saito 
1087f2ec16ccSHideki Saito   return Result;
1088f2ec16ccSHideki Saito }
1089f2ec16ccSHideki Saito 
1090f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1091f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1092f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1093f2ec16ccSHideki Saito   bool Result = true;
1094f2ec16ccSHideki Saito 
1095f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1096f2ec16ccSHideki Saito   // Check whether the loop-related control flow in the loop nest is expected by
1097f2ec16ccSHideki Saito   // vectorizer.
1098f2ec16ccSHideki Saito   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1099f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1100f2ec16ccSHideki Saito       Result = false;
1101f2ec16ccSHideki Saito     else
1102f2ec16ccSHideki Saito       return false;
1103f2ec16ccSHideki Saito   }
1104f2ec16ccSHideki Saito 
1105f2ec16ccSHideki Saito   // We need to have a loop header.
1106d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1107f2ec16ccSHideki Saito                     << '\n');
1108f2ec16ccSHideki Saito 
1109f2ec16ccSHideki Saito   // Specific checks for outer loops. We skip the remaining legal checks at this
1110f2ec16ccSHideki Saito   // point because they don't support outer loops.
1111f2ec16ccSHideki Saito   if (!TheLoop->empty()) {
1112f2ec16ccSHideki Saito     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1113f2ec16ccSHideki Saito 
1114f2ec16ccSHideki Saito     if (!canVectorizeOuterLoop()) {
11159e97caf5SRenato Golin       reportVectorizationFailure("Unsupported outer loop",
11169e97caf5SRenato Golin                                  "unsupported outer loop",
1117ec818d7fSHideki Saito                                  "UnsupportedOuterLoop",
1118ec818d7fSHideki Saito                                  ORE, TheLoop);
1119f2ec16ccSHideki Saito       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1120f2ec16ccSHideki Saito       // outer loops.
1121f2ec16ccSHideki Saito       return false;
1122f2ec16ccSHideki Saito     }
1123f2ec16ccSHideki Saito 
1124d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1125f2ec16ccSHideki Saito     return Result;
1126f2ec16ccSHideki Saito   }
1127f2ec16ccSHideki Saito 
1128f2ec16ccSHideki Saito   assert(TheLoop->empty() && "Inner loop expected.");
1129f2ec16ccSHideki Saito   // Check if we can if-convert non-single-bb loops.
1130f2ec16ccSHideki Saito   unsigned NumBlocks = TheLoop->getNumBlocks();
1131f2ec16ccSHideki Saito   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1132d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1133f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1134f2ec16ccSHideki Saito       Result = false;
1135f2ec16ccSHideki Saito     else
1136f2ec16ccSHideki Saito       return false;
1137f2ec16ccSHideki Saito   }
1138f2ec16ccSHideki Saito 
1139f2ec16ccSHideki Saito   // Check if we can vectorize the instructions and CFG in this loop.
1140f2ec16ccSHideki Saito   if (!canVectorizeInstrs()) {
1141d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1142f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1143f2ec16ccSHideki Saito       Result = false;
1144f2ec16ccSHideki Saito     else
1145f2ec16ccSHideki Saito       return false;
1146f2ec16ccSHideki Saito   }
1147f2ec16ccSHideki Saito 
1148f2ec16ccSHideki Saito   // Go over each instruction and look at memory deps.
1149f2ec16ccSHideki Saito   if (!canVectorizeMemory()) {
1150d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1151f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1152f2ec16ccSHideki Saito       Result = false;
1153f2ec16ccSHideki Saito     else
1154f2ec16ccSHideki Saito       return false;
1155f2ec16ccSHideki Saito   }
1156f2ec16ccSHideki Saito 
1157d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1158f2ec16ccSHideki Saito                     << (LAI->getRuntimePointerChecking()->Need
1159f2ec16ccSHideki Saito                             ? " (with a runtime bound check)"
1160f2ec16ccSHideki Saito                             : "")
1161f2ec16ccSHideki Saito                     << "!\n");
1162f2ec16ccSHideki Saito 
1163f2ec16ccSHideki Saito   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1164f2ec16ccSHideki Saito   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1165f2ec16ccSHideki Saito     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1166f2ec16ccSHideki Saito 
1167f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
11689e97caf5SRenato Golin     reportVectorizationFailure("Too many SCEV checks needed",
11699e97caf5SRenato Golin         "Too many SCEV assumptions need to be made and checked at runtime",
1170ec818d7fSHideki Saito         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1171f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1172f2ec16ccSHideki Saito       Result = false;
1173f2ec16ccSHideki Saito     else
1174f2ec16ccSHideki Saito       return false;
1175f2ec16ccSHideki Saito   }
1176f2ec16ccSHideki Saito 
1177f2ec16ccSHideki Saito   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1178f2ec16ccSHideki Saito   // we can vectorize, and at this point we don't have any other mem analysis
1179f2ec16ccSHideki Saito   // which may limit our maximum vectorization factor, so just return true with
1180f2ec16ccSHideki Saito   // no restrictions.
1181f2ec16ccSHideki Saito   return Result;
1182f2ec16ccSHideki Saito }
1183f2ec16ccSHideki Saito 
1184d57d73daSDorit Nuzman bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1185b0b5312eSAyal Zaks 
1186b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1187b0b5312eSAyal Zaks 
1188b0b5312eSAyal Zaks   if (!PrimaryInduction) {
11899e97caf5SRenato Golin     reportVectorizationFailure(
11909e97caf5SRenato Golin         "No primary induction, cannot fold tail by masking",
11919e97caf5SRenato Golin         "Missing a primary induction variable in the loop, which is "
11929e97caf5SRenato Golin         "needed in order to fold tail by masking as required.",
1193ec818d7fSHideki Saito         "NoPrimaryInduction", ORE, TheLoop);
1194b0b5312eSAyal Zaks     return false;
1195b0b5312eSAyal Zaks   }
1196b0b5312eSAyal Zaks 
1197d15df0edSAyal Zaks   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1198b0b5312eSAyal Zaks 
1199d15df0edSAyal Zaks   for (auto &Reduction : *getReductionVars())
1200d15df0edSAyal Zaks     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1201d15df0edSAyal Zaks 
1202d15df0edSAyal Zaks   // TODO: handle non-reduction outside users when tail is folded by masking.
1203b0b5312eSAyal Zaks   for (auto *AE : AllowedExit) {
1204d15df0edSAyal Zaks     // Check that all users of allowed exit values are inside the loop or
1205d15df0edSAyal Zaks     // are the live-out of a reduction.
1206d15df0edSAyal Zaks     if (ReductionLiveOuts.count(AE))
1207d15df0edSAyal Zaks       continue;
1208b0b5312eSAyal Zaks     for (User *U : AE->users()) {
1209b0b5312eSAyal Zaks       Instruction *UI = cast<Instruction>(U);
1210b0b5312eSAyal Zaks       if (TheLoop->contains(UI))
1211b0b5312eSAyal Zaks         continue;
12129e97caf5SRenato Golin       reportVectorizationFailure(
12139e97caf5SRenato Golin           "Cannot fold tail by masking, loop has an outside user for",
12149e97caf5SRenato Golin           "Cannot fold tail by masking in the presence of live outs.",
1215ec818d7fSHideki Saito           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1216b0b5312eSAyal Zaks       return false;
1217b0b5312eSAyal Zaks     }
1218b0b5312eSAyal Zaks   }
1219b0b5312eSAyal Zaks 
1220b0b5312eSAyal Zaks   // The list of pointers that we can safely read and write to remains empty.
1221b0b5312eSAyal Zaks   SmallPtrSet<Value *, 8> SafePointers;
1222b0b5312eSAyal Zaks 
1223b0b5312eSAyal Zaks   // Check and mark all blocks for predication, including those that ordinarily
1224b0b5312eSAyal Zaks   // do not need predication such as the header block.
1225b0b5312eSAyal Zaks   for (BasicBlock *BB : TheLoop->blocks()) {
1226d57d73daSDorit Nuzman     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
12279e97caf5SRenato Golin       reportVectorizationFailure(
12289e97caf5SRenato Golin           "Cannot fold tail by masking as required",
12299e97caf5SRenato Golin           "control flow cannot be substituted for a select",
1230ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
1231ec818d7fSHideki Saito           BB->getTerminator());
1232b0b5312eSAyal Zaks       return false;
1233b0b5312eSAyal Zaks     }
1234b0b5312eSAyal Zaks   }
1235b0b5312eSAyal Zaks 
1236b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1237b0b5312eSAyal Zaks   return true;
1238b0b5312eSAyal Zaks }
1239b0b5312eSAyal Zaks 
1240f2ec16ccSHideki Saito } // namespace llvm
1241