1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===//
2f2ec16ccSHideki Saito //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f2ec16ccSHideki Saito //
7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===//
8f2ec16ccSHideki Saito //
9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code
10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time.
11f2ec16ccSHideki Saito //
12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis
13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there
14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first).
15f2ec16ccSHideki Saito //
16ec818d7fSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorize.h"
17f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
18f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h"
19f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h"
20f2ec16ccSHideki Saito 
21f2ec16ccSHideki Saito using namespace llvm;
22f2ec16ccSHideki Saito 
23f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize"
24f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME
25f2ec16ccSHideki Saito 
264e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication;
274e4ecae0SHideki Saito 
28f2ec16ccSHideki Saito static cl::opt<bool>
29f2ec16ccSHideki Saito     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
30f2ec16ccSHideki Saito                        cl::desc("Enable if-conversion during vectorization."));
31f2ec16ccSHideki Saito 
32f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
33f2ec16ccSHideki Saito     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
34f2ec16ccSHideki Saito     cl::desc("The maximum allowed number of runtime memory checks with a "
35f2ec16ccSHideki Saito              "vectorize(enable) pragma."));
36f2ec16ccSHideki Saito 
37f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
38f2ec16ccSHideki Saito     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
39f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed."));
40f2ec16ccSHideki Saito 
41f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
42f2ec16ccSHideki Saito     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
43f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed with a "
44f2ec16ccSHideki Saito              "vectorize(enable) pragma"));
45f2ec16ccSHideki Saito 
46f2ec16ccSHideki Saito /// Maximum vectorization interleave count.
47f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16;
48f2ec16ccSHideki Saito 
49f2ec16ccSHideki Saito namespace llvm {
50f2ec16ccSHideki Saito 
51f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) {
52f2ec16ccSHideki Saito   switch (Kind) {
53f2ec16ccSHideki Saito   case HK_WIDTH:
54f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
55f2ec16ccSHideki Saito   case HK_UNROLL:
56f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
57f2ec16ccSHideki Saito   case HK_FORCE:
58f2ec16ccSHideki Saito     return (Val <= 1);
59f2ec16ccSHideki Saito   case HK_ISVECTORIZED:
6020b198ecSSjoerd Meijer   case HK_PREDICATE:
61f2ec16ccSHideki Saito     return (Val == 0 || Val == 1);
62f2ec16ccSHideki Saito   }
63f2ec16ccSHideki Saito   return false;
64f2ec16ccSHideki Saito }
65f2ec16ccSHideki Saito 
66d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
67d4eb13c8SMichael Kruse                                        bool InterleaveOnlyWhenForced,
68f2ec16ccSHideki Saito                                        OptimizationRemarkEmitter &ORE)
69f2ec16ccSHideki Saito     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
70d4eb13c8SMichael Kruse       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
71f2ec16ccSHideki Saito       Force("vectorize.enable", FK_Undefined, HK_FORCE),
7220b198ecSSjoerd Meijer       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
7320b198ecSSjoerd Meijer       Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
7420b198ecSSjoerd Meijer       ORE(ORE) {
75f2ec16ccSHideki Saito   // Populate values with existing loop metadata.
76f2ec16ccSHideki Saito   getHintsFromMetadata();
77f2ec16ccSHideki Saito 
78f2ec16ccSHideki Saito   // force-vector-interleave overrides DisableInterleaving.
79f2ec16ccSHideki Saito   if (VectorizerParams::isInterleaveForced())
80f2ec16ccSHideki Saito     Interleave.Value = VectorizerParams::VectorizationInterleave;
81f2ec16ccSHideki Saito 
82f2ec16ccSHideki Saito   if (IsVectorized.Value != 1)
83f2ec16ccSHideki Saito     // If the vectorization width and interleaving count are both 1 then
84f2ec16ccSHideki Saito     // consider the loop to have been already vectorized because there's
85f2ec16ccSHideki Saito     // nothing more that we can do.
86f2ec16ccSHideki Saito     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
87d4eb13c8SMichael Kruse   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
88f2ec16ccSHideki Saito              << "LV: Interleaving disabled by the pass manager\n");
89f2ec16ccSHideki Saito }
90f2ec16ccSHideki Saito 
9177a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() {
9277a614a6SMichael Kruse   LLVMContext &Context = TheLoop->getHeader()->getContext();
9377a614a6SMichael Kruse 
9477a614a6SMichael Kruse   MDNode *IsVectorizedMD = MDNode::get(
9577a614a6SMichael Kruse       Context,
9677a614a6SMichael Kruse       {MDString::get(Context, "llvm.loop.isvectorized"),
9777a614a6SMichael Kruse        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
9877a614a6SMichael Kruse   MDNode *LoopID = TheLoop->getLoopID();
9977a614a6SMichael Kruse   MDNode *NewLoopID =
10077a614a6SMichael Kruse       makePostTransformationMetadata(Context, LoopID,
10177a614a6SMichael Kruse                                      {Twine(Prefix(), "vectorize.").str(),
10277a614a6SMichael Kruse                                       Twine(Prefix(), "interleave.").str()},
10377a614a6SMichael Kruse                                      {IsVectorizedMD});
10477a614a6SMichael Kruse   TheLoop->setLoopID(NewLoopID);
10577a614a6SMichael Kruse 
10677a614a6SMichael Kruse   // Update internal cache.
10777a614a6SMichael Kruse   IsVectorized.Value = 1;
10877a614a6SMichael Kruse }
10977a614a6SMichael Kruse 
110d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization(
111d4eb13c8SMichael Kruse     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
112f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled) {
113d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
114f2ec16ccSHideki Saito     emitRemarkWithHints();
115f2ec16ccSHideki Saito     return false;
116f2ec16ccSHideki Saito   }
117f2ec16ccSHideki Saito 
118d4eb13c8SMichael Kruse   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
119d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
120f2ec16ccSHideki Saito     emitRemarkWithHints();
121f2ec16ccSHideki Saito     return false;
122f2ec16ccSHideki Saito   }
123f2ec16ccSHideki Saito 
124f2ec16ccSHideki Saito   if (getIsVectorized() == 1) {
125d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
126f2ec16ccSHideki Saito     // FIXME: Add interleave.disable metadata. This will allow
127f2ec16ccSHideki Saito     // vectorize.disable to be used without disabling the pass and errors
128f2ec16ccSHideki Saito     // to differentiate between disabled vectorization and a width of 1.
129f2ec16ccSHideki Saito     ORE.emit([&]() {
130f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
131f2ec16ccSHideki Saito                                         "AllDisabled", L->getStartLoc(),
132f2ec16ccSHideki Saito                                         L->getHeader())
133f2ec16ccSHideki Saito              << "loop not vectorized: vectorization and interleaving are "
134f2ec16ccSHideki Saito                 "explicitly disabled, or the loop has already been "
135f2ec16ccSHideki Saito                 "vectorized";
136f2ec16ccSHideki Saito     });
137f2ec16ccSHideki Saito     return false;
138f2ec16ccSHideki Saito   }
139f2ec16ccSHideki Saito 
140f2ec16ccSHideki Saito   return true;
141f2ec16ccSHideki Saito }
142f2ec16ccSHideki Saito 
143f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const {
144f2ec16ccSHideki Saito   using namespace ore;
145f2ec16ccSHideki Saito 
146f2ec16ccSHideki Saito   ORE.emit([&]() {
147f2ec16ccSHideki Saito     if (Force.Value == LoopVectorizeHints::FK_Disabled)
148f2ec16ccSHideki Saito       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
149f2ec16ccSHideki Saito                                       TheLoop->getStartLoc(),
150f2ec16ccSHideki Saito                                       TheLoop->getHeader())
151f2ec16ccSHideki Saito              << "loop not vectorized: vectorization is explicitly disabled";
152f2ec16ccSHideki Saito     else {
153f2ec16ccSHideki Saito       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
154f2ec16ccSHideki Saito                                  TheLoop->getStartLoc(), TheLoop->getHeader());
155f2ec16ccSHideki Saito       R << "loop not vectorized";
156f2ec16ccSHideki Saito       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
157f2ec16ccSHideki Saito         R << " (Force=" << NV("Force", true);
158f2ec16ccSHideki Saito         if (Width.Value != 0)
159f2ec16ccSHideki Saito           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
160f2ec16ccSHideki Saito         if (Interleave.Value != 0)
161f2ec16ccSHideki Saito           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
162f2ec16ccSHideki Saito         R << ")";
163f2ec16ccSHideki Saito       }
164f2ec16ccSHideki Saito       return R;
165f2ec16ccSHideki Saito     }
166f2ec16ccSHideki Saito   });
167f2ec16ccSHideki Saito }
168f2ec16ccSHideki Saito 
169f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
170f2ec16ccSHideki Saito   if (getWidth() == 1)
171f2ec16ccSHideki Saito     return LV_NAME;
172f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled)
173f2ec16ccSHideki Saito     return LV_NAME;
174f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
175f2ec16ccSHideki Saito     return LV_NAME;
176f2ec16ccSHideki Saito   return OptimizationRemarkAnalysis::AlwaysPrint;
177f2ec16ccSHideki Saito }
178f2ec16ccSHideki Saito 
179f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() {
180f2ec16ccSHideki Saito   MDNode *LoopID = TheLoop->getLoopID();
181f2ec16ccSHideki Saito   if (!LoopID)
182f2ec16ccSHideki Saito     return;
183f2ec16ccSHideki Saito 
184f2ec16ccSHideki Saito   // First operand should refer to the loop id itself.
185f2ec16ccSHideki Saito   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
186f2ec16ccSHideki Saito   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
187f2ec16ccSHideki Saito 
188f2ec16ccSHideki Saito   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
189f2ec16ccSHideki Saito     const MDString *S = nullptr;
190f2ec16ccSHideki Saito     SmallVector<Metadata *, 4> Args;
191f2ec16ccSHideki Saito 
192f2ec16ccSHideki Saito     // The expected hint is either a MDString or a MDNode with the first
193f2ec16ccSHideki Saito     // operand a MDString.
194f2ec16ccSHideki Saito     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
195f2ec16ccSHideki Saito       if (!MD || MD->getNumOperands() == 0)
196f2ec16ccSHideki Saito         continue;
197f2ec16ccSHideki Saito       S = dyn_cast<MDString>(MD->getOperand(0));
198f2ec16ccSHideki Saito       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
199f2ec16ccSHideki Saito         Args.push_back(MD->getOperand(i));
200f2ec16ccSHideki Saito     } else {
201f2ec16ccSHideki Saito       S = dyn_cast<MDString>(LoopID->getOperand(i));
202f2ec16ccSHideki Saito       assert(Args.size() == 0 && "too many arguments for MDString");
203f2ec16ccSHideki Saito     }
204f2ec16ccSHideki Saito 
205f2ec16ccSHideki Saito     if (!S)
206f2ec16ccSHideki Saito       continue;
207f2ec16ccSHideki Saito 
208f2ec16ccSHideki Saito     // Check if the hint starts with the loop metadata prefix.
209f2ec16ccSHideki Saito     StringRef Name = S->getString();
210f2ec16ccSHideki Saito     if (Args.size() == 1)
211f2ec16ccSHideki Saito       setHint(Name, Args[0]);
212f2ec16ccSHideki Saito   }
213f2ec16ccSHideki Saito }
214f2ec16ccSHideki Saito 
215f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
216f2ec16ccSHideki Saito   if (!Name.startswith(Prefix()))
217f2ec16ccSHideki Saito     return;
218f2ec16ccSHideki Saito   Name = Name.substr(Prefix().size(), StringRef::npos);
219f2ec16ccSHideki Saito 
220f2ec16ccSHideki Saito   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
221f2ec16ccSHideki Saito   if (!C)
222f2ec16ccSHideki Saito     return;
223f2ec16ccSHideki Saito   unsigned Val = C->getZExtValue();
224f2ec16ccSHideki Saito 
22520b198ecSSjoerd Meijer   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
226f2ec16ccSHideki Saito   for (auto H : Hints) {
227f2ec16ccSHideki Saito     if (Name == H->Name) {
228f2ec16ccSHideki Saito       if (H->validate(Val))
229f2ec16ccSHideki Saito         H->Value = Val;
230f2ec16ccSHideki Saito       else
231d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
232f2ec16ccSHideki Saito       break;
233f2ec16ccSHideki Saito     }
234f2ec16ccSHideki Saito   }
235f2ec16ccSHideki Saito }
236f2ec16ccSHideki Saito 
237f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet(
238f2ec16ccSHideki Saito     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
239f2ec16ccSHideki Saito   const char *PassName = Hints.vectorizeAnalysisPassName();
240f2ec16ccSHideki Saito   bool Failed = false;
241f2ec16ccSHideki Saito   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
242f2ec16ccSHideki Saito     ORE.emit([&]() {
243f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisFPCommute(
244f2ec16ccSHideki Saito                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
245f2ec16ccSHideki Saito                  UnsafeAlgebraInst->getParent())
246f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
247f2ec16ccSHideki Saito                 "floating-point operations";
248f2ec16ccSHideki Saito     });
249f2ec16ccSHideki Saito     Failed = true;
250f2ec16ccSHideki Saito   }
251f2ec16ccSHideki Saito 
252f2ec16ccSHideki Saito   // Test if runtime memcheck thresholds are exceeded.
253f2ec16ccSHideki Saito   bool PragmaThresholdReached =
254f2ec16ccSHideki Saito       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
255f2ec16ccSHideki Saito   bool ThresholdReached =
256f2ec16ccSHideki Saito       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
257f2ec16ccSHideki Saito   if ((ThresholdReached && !Hints.allowReordering()) ||
258f2ec16ccSHideki Saito       PragmaThresholdReached) {
259f2ec16ccSHideki Saito     ORE.emit([&]() {
260f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
261f2ec16ccSHideki Saito                                                 L->getStartLoc(),
262f2ec16ccSHideki Saito                                                 L->getHeader())
263f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
264f2ec16ccSHideki Saito                 "memory operations";
265f2ec16ccSHideki Saito     });
266d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
267f2ec16ccSHideki Saito     Failed = true;
268f2ec16ccSHideki Saito   }
269f2ec16ccSHideki Saito 
270f2ec16ccSHideki Saito   return Failed;
271f2ec16ccSHideki Saito }
272f2ec16ccSHideki Saito 
273f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop
274f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
275f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is
276f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is
277f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions:
278f2ec16ccSHideki Saito //   1) it has a canonical IV (starting from 0 and with stride 1),
279f2ec16ccSHideki Saito //   2) its latch terminator is a conditional branch and,
280f2ec16ccSHideki Saito //   3) its latch condition is a compare instruction whose operands are the
281f2ec16ccSHideki Saito //      canonical IV and an OuterLp invariant.
282f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not
283f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity.
284f2ec16ccSHideki Saito //
285f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation
286f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop
287f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and
288f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for
289f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we
290f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer
291f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit
292f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away
293f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not
294f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate
295f2ec16ccSHideki Saito // function that is only executed once for each \p Lp.
296f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
297f2ec16ccSHideki Saito   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
298f2ec16ccSHideki Saito 
299f2ec16ccSHideki Saito   // If Lp is the outer loop, it's uniform by definition.
300f2ec16ccSHideki Saito   if (Lp == OuterLp)
301f2ec16ccSHideki Saito     return true;
302f2ec16ccSHideki Saito   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
303f2ec16ccSHideki Saito 
304f2ec16ccSHideki Saito   // 1.
305f2ec16ccSHideki Saito   PHINode *IV = Lp->getCanonicalInductionVariable();
306f2ec16ccSHideki Saito   if (!IV) {
307d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
308f2ec16ccSHideki Saito     return false;
309f2ec16ccSHideki Saito   }
310f2ec16ccSHideki Saito 
311f2ec16ccSHideki Saito   // 2.
312f2ec16ccSHideki Saito   BasicBlock *Latch = Lp->getLoopLatch();
313f2ec16ccSHideki Saito   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
314f2ec16ccSHideki Saito   if (!LatchBr || LatchBr->isUnconditional()) {
315d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
316f2ec16ccSHideki Saito     return false;
317f2ec16ccSHideki Saito   }
318f2ec16ccSHideki Saito 
319f2ec16ccSHideki Saito   // 3.
320f2ec16ccSHideki Saito   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
321f2ec16ccSHideki Saito   if (!LatchCmp) {
322d34e60caSNicola Zaghen     LLVM_DEBUG(
323d34e60caSNicola Zaghen         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
324f2ec16ccSHideki Saito     return false;
325f2ec16ccSHideki Saito   }
326f2ec16ccSHideki Saito 
327f2ec16ccSHideki Saito   Value *CondOp0 = LatchCmp->getOperand(0);
328f2ec16ccSHideki Saito   Value *CondOp1 = LatchCmp->getOperand(1);
329f2ec16ccSHideki Saito   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
330f2ec16ccSHideki Saito   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
331f2ec16ccSHideki Saito       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
332d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
333f2ec16ccSHideki Saito     return false;
334f2ec16ccSHideki Saito   }
335f2ec16ccSHideki Saito 
336f2ec16ccSHideki Saito   return true;
337f2ec16ccSHideki Saito }
338f2ec16ccSHideki Saito 
339f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p
340f2ec16ccSHideki Saito // OuterLp.
341f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
342f2ec16ccSHideki Saito   if (!isUniformLoop(Lp, OuterLp))
343f2ec16ccSHideki Saito     return false;
344f2ec16ccSHideki Saito 
345f2ec16ccSHideki Saito   // Check if nested loops are uniform.
346f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
347f2ec16ccSHideki Saito     if (!isUniformLoopNest(SubLp, OuterLp))
348f2ec16ccSHideki Saito       return false;
349f2ec16ccSHideki Saito 
350f2ec16ccSHideki Saito   return true;
351f2ec16ccSHideki Saito }
352f2ec16ccSHideki Saito 
3535f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node.
354f2ec16ccSHideki Saito ///
355f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if
356f2ec16ccSHideki Saito /// convert.
357f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) {
358f2ec16ccSHideki Saito   for (PHINode &Phi : BB->phis()) {
359f2ec16ccSHideki Saito     for (Value *V : Phi.incoming_values())
360f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(V))
361f2ec16ccSHideki Saito         if (C->canTrap())
362f2ec16ccSHideki Saito           return false;
363f2ec16ccSHideki Saito   }
364f2ec16ccSHideki Saito   return true;
365f2ec16ccSHideki Saito }
366f2ec16ccSHideki Saito 
367f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
368f2ec16ccSHideki Saito   if (Ty->isPointerTy())
369f2ec16ccSHideki Saito     return DL.getIntPtrType(Ty);
370f2ec16ccSHideki Saito 
371f2ec16ccSHideki Saito   // It is possible that char's or short's overflow when we ask for the loop's
372f2ec16ccSHideki Saito   // trip count, work around this by changing the type size.
373f2ec16ccSHideki Saito   if (Ty->getScalarSizeInBits() < 32)
374f2ec16ccSHideki Saito     return Type::getInt32Ty(Ty->getContext());
375f2ec16ccSHideki Saito 
376f2ec16ccSHideki Saito   return Ty;
377f2ec16ccSHideki Saito }
378f2ec16ccSHideki Saito 
379f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
380f2ec16ccSHideki Saito   Ty0 = convertPointerToIntegerType(DL, Ty0);
381f2ec16ccSHideki Saito   Ty1 = convertPointerToIntegerType(DL, Ty1);
382f2ec16ccSHideki Saito   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
383f2ec16ccSHideki Saito     return Ty0;
384f2ec16ccSHideki Saito   return Ty1;
385f2ec16ccSHideki Saito }
386f2ec16ccSHideki Saito 
3875f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an
388f2ec16ccSHideki Saito /// identified reduction variable.
389f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
390f2ec16ccSHideki Saito                                SmallPtrSetImpl<Value *> &AllowedExit) {
39160a1e4ddSAnna Thomas   // Reductions, Inductions and non-header phis are allowed to have exit users. All
392f2ec16ccSHideki Saito   // other instructions must not have external users.
393f2ec16ccSHideki Saito   if (!AllowedExit.count(Inst))
394f2ec16ccSHideki Saito     // Check that all of the users of the loop are inside the BB.
395f2ec16ccSHideki Saito     for (User *U : Inst->users()) {
396f2ec16ccSHideki Saito       Instruction *UI = cast<Instruction>(U);
397f2ec16ccSHideki Saito       // This user may be a reduction exit value.
398f2ec16ccSHideki Saito       if (!TheLoop->contains(UI)) {
399d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
400f2ec16ccSHideki Saito         return true;
401f2ec16ccSHideki Saito       }
402f2ec16ccSHideki Saito     }
403f2ec16ccSHideki Saito   return false;
404f2ec16ccSHideki Saito }
405f2ec16ccSHideki Saito 
406f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
407f2ec16ccSHideki Saito   const ValueToValueMap &Strides =
408f2ec16ccSHideki Saito       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
409f2ec16ccSHideki Saito 
410f2ec16ccSHideki Saito   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
411f2ec16ccSHideki Saito   if (Stride == 1 || Stride == -1)
412f2ec16ccSHideki Saito     return Stride;
413f2ec16ccSHideki Saito   return 0;
414f2ec16ccSHideki Saito }
415f2ec16ccSHideki Saito 
416f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) {
417f2ec16ccSHideki Saito   return LAI->isUniform(V);
418f2ec16ccSHideki Saito }
419f2ec16ccSHideki Saito 
420f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() {
421f2ec16ccSHideki Saito   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
422f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
423f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
424f2ec16ccSHideki Saito   bool Result = true;
425f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
426f2ec16ccSHideki Saito 
427f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
428f2ec16ccSHideki Saito     // Check whether the BB terminator is a BranchInst. Any other terminator is
429f2ec16ccSHideki Saito     // not supported yet.
430f2ec16ccSHideki Saito     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
431f2ec16ccSHideki Saito     if (!Br) {
4329e97caf5SRenato Golin       reportVectorizationFailure("Unsupported basic block terminator",
4339e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
434ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
435f2ec16ccSHideki Saito       if (DoExtraAnalysis)
436f2ec16ccSHideki Saito         Result = false;
437f2ec16ccSHideki Saito       else
438f2ec16ccSHideki Saito         return false;
439f2ec16ccSHideki Saito     }
440f2ec16ccSHideki Saito 
441f2ec16ccSHideki Saito     // Check whether the BranchInst is a supported one. Only unconditional
442f2ec16ccSHideki Saito     // branches, conditional branches with an outer loop invariant condition or
443f2ec16ccSHideki Saito     // backedges are supported.
4444e4ecae0SHideki Saito     // FIXME: We skip these checks when VPlan predication is enabled as we
4454e4ecae0SHideki Saito     // want to allow divergent branches. This whole check will be removed
4464e4ecae0SHideki Saito     // once VPlan predication is on by default.
4474e4ecae0SHideki Saito     if (!EnableVPlanPredication && Br && Br->isConditional() &&
448f2ec16ccSHideki Saito         !TheLoop->isLoopInvariant(Br->getCondition()) &&
449f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(0)) &&
450f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(1))) {
4519e97caf5SRenato Golin       reportVectorizationFailure("Unsupported conditional branch",
4529e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
453ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
454f2ec16ccSHideki Saito       if (DoExtraAnalysis)
455f2ec16ccSHideki Saito         Result = false;
456f2ec16ccSHideki Saito       else
457f2ec16ccSHideki Saito         return false;
458f2ec16ccSHideki Saito     }
459f2ec16ccSHideki Saito   }
460f2ec16ccSHideki Saito 
461f2ec16ccSHideki Saito   // Check whether inner loops are uniform. At this point, we only support
462f2ec16ccSHideki Saito   // simple outer loops scenarios with uniform nested loops.
463f2ec16ccSHideki Saito   if (!isUniformLoopNest(TheLoop /*loop nest*/,
464f2ec16ccSHideki Saito                          TheLoop /*context outer loop*/)) {
4659e97caf5SRenato Golin     reportVectorizationFailure("Outer loop contains divergent loops",
4669e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
467ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
468f2ec16ccSHideki Saito     if (DoExtraAnalysis)
469f2ec16ccSHideki Saito       Result = false;
470f2ec16ccSHideki Saito     else
471f2ec16ccSHideki Saito       return false;
472f2ec16ccSHideki Saito   }
473f2ec16ccSHideki Saito 
474ea7f3035SHideki Saito   // Check whether we are able to set up outer loop induction.
475ea7f3035SHideki Saito   if (!setupOuterLoopInductions()) {
4769e97caf5SRenato Golin     reportVectorizationFailure("Unsupported outer loop Phi(s)",
4779e97caf5SRenato Golin                                "Unsupported outer loop Phi(s)",
478ec818d7fSHideki Saito                                "UnsupportedPhi", ORE, TheLoop);
479ea7f3035SHideki Saito     if (DoExtraAnalysis)
480ea7f3035SHideki Saito       Result = false;
481ea7f3035SHideki Saito     else
482ea7f3035SHideki Saito       return false;
483ea7f3035SHideki Saito   }
484ea7f3035SHideki Saito 
485f2ec16ccSHideki Saito   return Result;
486f2ec16ccSHideki Saito }
487f2ec16ccSHideki Saito 
488f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi(
489f2ec16ccSHideki Saito     PHINode *Phi, const InductionDescriptor &ID,
490f2ec16ccSHideki Saito     SmallPtrSetImpl<Value *> &AllowedExit) {
491f2ec16ccSHideki Saito   Inductions[Phi] = ID;
492f2ec16ccSHideki Saito 
493f2ec16ccSHideki Saito   // In case this induction also comes with casts that we know we can ignore
494f2ec16ccSHideki Saito   // in the vectorized loop body, record them here. All casts could be recorded
495f2ec16ccSHideki Saito   // here for ignoring, but suffices to record only the first (as it is the
496f2ec16ccSHideki Saito   // only one that may bw used outside the cast sequence).
497f2ec16ccSHideki Saito   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
498f2ec16ccSHideki Saito   if (!Casts.empty())
499f2ec16ccSHideki Saito     InductionCastsToIgnore.insert(*Casts.begin());
500f2ec16ccSHideki Saito 
501f2ec16ccSHideki Saito   Type *PhiTy = Phi->getType();
502f2ec16ccSHideki Saito   const DataLayout &DL = Phi->getModule()->getDataLayout();
503f2ec16ccSHideki Saito 
504f2ec16ccSHideki Saito   // Get the widest type.
505f2ec16ccSHideki Saito   if (!PhiTy->isFloatingPointTy()) {
506f2ec16ccSHideki Saito     if (!WidestIndTy)
507f2ec16ccSHideki Saito       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
508f2ec16ccSHideki Saito     else
509f2ec16ccSHideki Saito       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
510f2ec16ccSHideki Saito   }
511f2ec16ccSHideki Saito 
512f2ec16ccSHideki Saito   // Int inductions are special because we only allow one IV.
513f2ec16ccSHideki Saito   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
514f2ec16ccSHideki Saito       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
515f2ec16ccSHideki Saito       isa<Constant>(ID.getStartValue()) &&
516f2ec16ccSHideki Saito       cast<Constant>(ID.getStartValue())->isNullValue()) {
517f2ec16ccSHideki Saito 
518f2ec16ccSHideki Saito     // Use the phi node with the widest type as induction. Use the last
519f2ec16ccSHideki Saito     // one if there are multiple (no good reason for doing this other
520f2ec16ccSHideki Saito     // than it is expedient). We've checked that it begins at zero and
521f2ec16ccSHideki Saito     // steps by one, so this is a canonical induction variable.
522f2ec16ccSHideki Saito     if (!PrimaryInduction || PhiTy == WidestIndTy)
523f2ec16ccSHideki Saito       PrimaryInduction = Phi;
524f2ec16ccSHideki Saito   }
525f2ec16ccSHideki Saito 
526f2ec16ccSHideki Saito   // Both the PHI node itself, and the "post-increment" value feeding
527f2ec16ccSHideki Saito   // back into the PHI node may have external users.
528f2ec16ccSHideki Saito   // We can allow those uses, except if the SCEVs we have for them rely
529f2ec16ccSHideki Saito   // on predicates that only hold within the loop, since allowing the exit
5306a1dd77fSAnna Thomas   // currently means re-using this SCEV outside the loop (see PR33706 for more
5316a1dd77fSAnna Thomas   // details).
532f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().isAlwaysTrue()) {
533f2ec16ccSHideki Saito     AllowedExit.insert(Phi);
534f2ec16ccSHideki Saito     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
535f2ec16ccSHideki Saito   }
536f2ec16ccSHideki Saito 
537d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
538f2ec16ccSHideki Saito }
539f2ec16ccSHideki Saito 
540ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() {
541ea7f3035SHideki Saito   BasicBlock *Header = TheLoop->getHeader();
542ea7f3035SHideki Saito 
543ea7f3035SHideki Saito   // Returns true if a given Phi is a supported induction.
544ea7f3035SHideki Saito   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
545ea7f3035SHideki Saito     InductionDescriptor ID;
546ea7f3035SHideki Saito     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
547ea7f3035SHideki Saito         ID.getKind() == InductionDescriptor::IK_IntInduction) {
548ea7f3035SHideki Saito       addInductionPhi(&Phi, ID, AllowedExit);
549ea7f3035SHideki Saito       return true;
550ea7f3035SHideki Saito     } else {
551ea7f3035SHideki Saito       // Bail out for any Phi in the outer loop header that is not a supported
552ea7f3035SHideki Saito       // induction.
553ea7f3035SHideki Saito       LLVM_DEBUG(
554ea7f3035SHideki Saito           dbgs()
555ea7f3035SHideki Saito           << "LV: Found unsupported PHI for outer loop vectorization.\n");
556ea7f3035SHideki Saito       return false;
557ea7f3035SHideki Saito     }
558ea7f3035SHideki Saito   };
559ea7f3035SHideki Saito 
560ea7f3035SHideki Saito   if (llvm::all_of(Header->phis(), isSupportedPhi))
561ea7f3035SHideki Saito     return true;
562ea7f3035SHideki Saito   else
563ea7f3035SHideki Saito     return false;
564ea7f3035SHideki Saito }
565ea7f3035SHideki Saito 
566f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() {
567f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
568f2ec16ccSHideki Saito 
569f2ec16ccSHideki Saito   // Look for the attribute signaling the absence of NaNs.
570f2ec16ccSHideki Saito   Function &F = *Header->getParent();
571f2ec16ccSHideki Saito   HasFunNoNaNAttr =
572f2ec16ccSHideki Saito       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
573f2ec16ccSHideki Saito 
574f2ec16ccSHideki Saito   // For each block in the loop.
575f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
576f2ec16ccSHideki Saito     // Scan the instructions in the block and look for hazards.
577f2ec16ccSHideki Saito     for (Instruction &I : *BB) {
578f2ec16ccSHideki Saito       if (auto *Phi = dyn_cast<PHINode>(&I)) {
579f2ec16ccSHideki Saito         Type *PhiTy = Phi->getType();
580f2ec16ccSHideki Saito         // Check that this PHI type is allowed.
581f2ec16ccSHideki Saito         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
582f2ec16ccSHideki Saito             !PhiTy->isPointerTy()) {
5839e97caf5SRenato Golin           reportVectorizationFailure("Found a non-int non-pointer PHI",
5849e97caf5SRenato Golin                                      "loop control flow is not understood by vectorizer",
585ec818d7fSHideki Saito                                      "CFGNotUnderstood", ORE, TheLoop);
586f2ec16ccSHideki Saito           return false;
587f2ec16ccSHideki Saito         }
588f2ec16ccSHideki Saito 
589f2ec16ccSHideki Saito         // If this PHINode is not in the header block, then we know that we
590f2ec16ccSHideki Saito         // can convert it to select during if-conversion. No need to check if
591f2ec16ccSHideki Saito         // the PHIs in this block are induction or reduction variables.
592f2ec16ccSHideki Saito         if (BB != Header) {
59360a1e4ddSAnna Thomas           // Non-header phi nodes that have outside uses can be vectorized. Add
59460a1e4ddSAnna Thomas           // them to the list of allowed exits.
59560a1e4ddSAnna Thomas           // Unsafe cyclic dependencies with header phis are identified during
59660a1e4ddSAnna Thomas           // legalization for reduction, induction and first order
59760a1e4ddSAnna Thomas           // recurrences.
598dd18ce45SBjorn Pettersson           AllowedExit.insert(&I);
599f2ec16ccSHideki Saito           continue;
600f2ec16ccSHideki Saito         }
601f2ec16ccSHideki Saito 
602f2ec16ccSHideki Saito         // We only allow if-converted PHIs with exactly two incoming values.
603f2ec16ccSHideki Saito         if (Phi->getNumIncomingValues() != 2) {
6049e97caf5SRenato Golin           reportVectorizationFailure("Found an invalid PHI",
6059e97caf5SRenato Golin               "loop control flow is not understood by vectorizer",
606ec818d7fSHideki Saito               "CFGNotUnderstood", ORE, TheLoop, Phi);
607f2ec16ccSHideki Saito           return false;
608f2ec16ccSHideki Saito         }
609f2ec16ccSHideki Saito 
610f2ec16ccSHideki Saito         RecurrenceDescriptor RedDes;
611f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
612f2ec16ccSHideki Saito                                                  DT)) {
613f2ec16ccSHideki Saito           if (RedDes.hasUnsafeAlgebra())
614f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
615f2ec16ccSHideki Saito           AllowedExit.insert(RedDes.getLoopExitInstr());
616f2ec16ccSHideki Saito           Reductions[Phi] = RedDes;
617f2ec16ccSHideki Saito           continue;
618f2ec16ccSHideki Saito         }
619f2ec16ccSHideki Saito 
620b02b0ad8SAnna Thomas         // TODO: Instead of recording the AllowedExit, it would be good to record the
621b02b0ad8SAnna Thomas         // complementary set: NotAllowedExit. These include (but may not be
622b02b0ad8SAnna Thomas         // limited to):
623b02b0ad8SAnna Thomas         // 1. Reduction phis as they represent the one-before-last value, which
624b02b0ad8SAnna Thomas         // is not available when vectorized
625b02b0ad8SAnna Thomas         // 2. Induction phis and increment when SCEV predicates cannot be used
626b02b0ad8SAnna Thomas         // outside the loop - see addInductionPhi
627b02b0ad8SAnna Thomas         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
628b02b0ad8SAnna Thomas         // outside the loop - see call to hasOutsideLoopUser in the non-phi
629b02b0ad8SAnna Thomas         // handling below
630b02b0ad8SAnna Thomas         // 4. FirstOrderRecurrence phis that can possibly be handled by
631b02b0ad8SAnna Thomas         // extraction.
632b02b0ad8SAnna Thomas         // By recording these, we can then reason about ways to vectorize each
633b02b0ad8SAnna Thomas         // of these NotAllowedExit.
634f2ec16ccSHideki Saito         InductionDescriptor ID;
635f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
636f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
637f2ec16ccSHideki Saito           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
638f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
639f2ec16ccSHideki Saito           continue;
640f2ec16ccSHideki Saito         }
641f2ec16ccSHideki Saito 
642f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
643f2ec16ccSHideki Saito                                                          SinkAfter, DT)) {
644f2ec16ccSHideki Saito           FirstOrderRecurrences.insert(Phi);
645f2ec16ccSHideki Saito           continue;
646f2ec16ccSHideki Saito         }
647f2ec16ccSHideki Saito 
648f2ec16ccSHideki Saito         // As a last resort, coerce the PHI to a AddRec expression
649f2ec16ccSHideki Saito         // and re-try classifying it a an induction PHI.
650f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
651f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
652f2ec16ccSHideki Saito           continue;
653f2ec16ccSHideki Saito         }
654f2ec16ccSHideki Saito 
6559e97caf5SRenato Golin         reportVectorizationFailure("Found an unidentified PHI",
6569e97caf5SRenato Golin             "value that could not be identified as "
6579e97caf5SRenato Golin             "reduction is used outside the loop",
658ec818d7fSHideki Saito             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
659f2ec16ccSHideki Saito         return false;
660f2ec16ccSHideki Saito       } // end of PHI handling
661f2ec16ccSHideki Saito 
662f2ec16ccSHideki Saito       // We handle calls that:
663f2ec16ccSHideki Saito       //   * Are debug info intrinsics.
664f2ec16ccSHideki Saito       //   * Have a mapping to an IR intrinsic.
665f2ec16ccSHideki Saito       //   * Have a vector version available.
666f2ec16ccSHideki Saito       auto *CI = dyn_cast<CallInst>(&I);
667f2ec16ccSHideki Saito       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
668f2ec16ccSHideki Saito           !isa<DbgInfoIntrinsic>(CI) &&
669f2ec16ccSHideki Saito           !(CI->getCalledFunction() && TLI &&
670f2ec16ccSHideki Saito             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
6717d65fe5cSSanjay Patel         // If the call is a recognized math libary call, it is likely that
6727d65fe5cSSanjay Patel         // we can vectorize it given loosened floating-point constraints.
6737d65fe5cSSanjay Patel         LibFunc Func;
6747d65fe5cSSanjay Patel         bool IsMathLibCall =
6757d65fe5cSSanjay Patel             TLI && CI->getCalledFunction() &&
6767d65fe5cSSanjay Patel             CI->getType()->isFloatingPointTy() &&
6777d65fe5cSSanjay Patel             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
6787d65fe5cSSanjay Patel             TLI->hasOptimizedCodeGen(Func);
6797d65fe5cSSanjay Patel 
6807d65fe5cSSanjay Patel         if (IsMathLibCall) {
6817d65fe5cSSanjay Patel           // TODO: Ideally, we should not use clang-specific language here,
6827d65fe5cSSanjay Patel           // but it's hard to provide meaningful yet generic advice.
6837d65fe5cSSanjay Patel           // Also, should this be guarded by allowExtraAnalysis() and/or be part
6847d65fe5cSSanjay Patel           // of the returned info from isFunctionVectorizable()?
6859e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
6869e97caf5SRenato Golin               "library call cannot be vectorized. "
6877d65fe5cSSanjay Patel               "Try compiling with -fno-math-errno, -ffast-math, "
6889e97caf5SRenato Golin               "or similar flags",
689ec818d7fSHideki Saito               "CantVectorizeLibcall", ORE, TheLoop, CI);
6907d65fe5cSSanjay Patel         } else {
6919e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
6929e97caf5SRenato Golin                                      "call instruction cannot be vectorized",
693ec818d7fSHideki Saito                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
6947d65fe5cSSanjay Patel         }
695f2ec16ccSHideki Saito         return false;
696f2ec16ccSHideki Saito       }
697f2ec16ccSHideki Saito 
698a066f1f9SSimon Pilgrim       // Some intrinsics have scalar arguments and should be same in order for
699a066f1f9SSimon Pilgrim       // them to be vectorized (i.e. loop invariant).
700a066f1f9SSimon Pilgrim       if (CI) {
701f2ec16ccSHideki Saito         auto *SE = PSE.getSE();
702a066f1f9SSimon Pilgrim         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
703a066f1f9SSimon Pilgrim         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
704a066f1f9SSimon Pilgrim           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
705a066f1f9SSimon Pilgrim             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
7069e97caf5SRenato Golin               reportVectorizationFailure("Found unvectorizable intrinsic",
7079e97caf5SRenato Golin                   "intrinsic instruction cannot be vectorized",
708ec818d7fSHideki Saito                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
709f2ec16ccSHideki Saito               return false;
710f2ec16ccSHideki Saito             }
711f2ec16ccSHideki Saito           }
712a066f1f9SSimon Pilgrim       }
713f2ec16ccSHideki Saito 
714f2ec16ccSHideki Saito       // Check that the instruction return type is vectorizable.
715f2ec16ccSHideki Saito       // Also, we can't vectorize extractelement instructions.
716f2ec16ccSHideki Saito       if ((!VectorType::isValidElementType(I.getType()) &&
717f2ec16ccSHideki Saito            !I.getType()->isVoidTy()) ||
718f2ec16ccSHideki Saito           isa<ExtractElementInst>(I)) {
7199e97caf5SRenato Golin         reportVectorizationFailure("Found unvectorizable type",
7209e97caf5SRenato Golin             "instruction return type cannot be vectorized",
721ec818d7fSHideki Saito             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
722f2ec16ccSHideki Saito         return false;
723f2ec16ccSHideki Saito       }
724f2ec16ccSHideki Saito 
725f2ec16ccSHideki Saito       // Check that the stored type is vectorizable.
726f2ec16ccSHideki Saito       if (auto *ST = dyn_cast<StoreInst>(&I)) {
727f2ec16ccSHideki Saito         Type *T = ST->getValueOperand()->getType();
728f2ec16ccSHideki Saito         if (!VectorType::isValidElementType(T)) {
7299e97caf5SRenato Golin           reportVectorizationFailure("Store instruction cannot be vectorized",
7309e97caf5SRenato Golin                                      "store instruction cannot be vectorized",
731ec818d7fSHideki Saito                                      "CantVectorizeStore", ORE, TheLoop, ST);
732f2ec16ccSHideki Saito           return false;
733f2ec16ccSHideki Saito         }
734f2ec16ccSHideki Saito 
7356452bdd2SWarren Ristow         // For nontemporal stores, check that a nontemporal vector version is
7366452bdd2SWarren Ristow         // supported on the target.
7376452bdd2SWarren Ristow         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
7386452bdd2SWarren Ristow           // Arbitrarily try a vector of 2 elements.
7396452bdd2SWarren Ristow           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
7406452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of stored type");
7416452bdd2SWarren Ristow           unsigned Alignment = getLoadStoreAlignment(ST);
742*33671cefSGuillaume Chatelet           assert(Alignment && "Alignment should be set");
743*33671cefSGuillaume Chatelet           if (!TTI->isLegalNTStore(VecTy, llvm::Align(Alignment))) {
7446452bdd2SWarren Ristow             reportVectorizationFailure(
7456452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
7466452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
747ec818d7fSHideki Saito                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
7486452bdd2SWarren Ristow             return false;
7496452bdd2SWarren Ristow           }
7506452bdd2SWarren Ristow         }
7516452bdd2SWarren Ristow 
7526452bdd2SWarren Ristow       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
7536452bdd2SWarren Ristow         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
7546452bdd2SWarren Ristow           // For nontemporal loads, check that a nontemporal vector version is
7556452bdd2SWarren Ristow           // supported on the target (arbitrarily try a vector of 2 elements).
7566452bdd2SWarren Ristow           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
7576452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of load type");
7586452bdd2SWarren Ristow           unsigned Alignment = getLoadStoreAlignment(LD);
759*33671cefSGuillaume Chatelet           assert(Alignment && "Alignment should be set");
760*33671cefSGuillaume Chatelet           if (!TTI->isLegalNTLoad(VecTy, llvm::Align(Alignment))) {
7616452bdd2SWarren Ristow             reportVectorizationFailure(
7626452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
7636452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
764ec818d7fSHideki Saito                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
7656452bdd2SWarren Ristow             return false;
7666452bdd2SWarren Ristow           }
7676452bdd2SWarren Ristow         }
7686452bdd2SWarren Ristow 
769f2ec16ccSHideki Saito         // FP instructions can allow unsafe algebra, thus vectorizable by
770f2ec16ccSHideki Saito         // non-IEEE-754 compliant SIMD units.
771f2ec16ccSHideki Saito         // This applies to floating-point math operations and calls, not memory
772f2ec16ccSHideki Saito         // operations, shuffles, or casts, as they don't change precision or
773f2ec16ccSHideki Saito         // semantics.
774f2ec16ccSHideki Saito       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
775f2ec16ccSHideki Saito                  !I.isFast()) {
776d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
777f2ec16ccSHideki Saito         Hints->setPotentiallyUnsafe();
778f2ec16ccSHideki Saito       }
779f2ec16ccSHideki Saito 
780f2ec16ccSHideki Saito       // Reduction instructions are allowed to have exit users.
781f2ec16ccSHideki Saito       // All other instructions must not have external users.
782f2ec16ccSHideki Saito       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
783b02b0ad8SAnna Thomas         // We can safely vectorize loops where instructions within the loop are
784b02b0ad8SAnna Thomas         // used outside the loop only if the SCEV predicates within the loop is
785b02b0ad8SAnna Thomas         // same as outside the loop. Allowing the exit means reusing the SCEV
786b02b0ad8SAnna Thomas         // outside the loop.
787b02b0ad8SAnna Thomas         if (PSE.getUnionPredicate().isAlwaysTrue()) {
788b02b0ad8SAnna Thomas           AllowedExit.insert(&I);
789b02b0ad8SAnna Thomas           continue;
790b02b0ad8SAnna Thomas         }
7919e97caf5SRenato Golin         reportVectorizationFailure("Value cannot be used outside the loop",
7929e97caf5SRenato Golin                                    "value cannot be used outside the loop",
793ec818d7fSHideki Saito                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
794f2ec16ccSHideki Saito         return false;
795f2ec16ccSHideki Saito       }
796f2ec16ccSHideki Saito     } // next instr.
797f2ec16ccSHideki Saito   }
798f2ec16ccSHideki Saito 
799f2ec16ccSHideki Saito   if (!PrimaryInduction) {
800f2ec16ccSHideki Saito     if (Inductions.empty()) {
8019e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8029e97caf5SRenato Golin           "loop induction variable could not be identified",
803ec818d7fSHideki Saito           "NoInductionVariable", ORE, TheLoop);
804f2ec16ccSHideki Saito       return false;
8054f27730eSWarren Ristow     } else if (!WidestIndTy) {
8069e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8079e97caf5SRenato Golin           "integer loop induction variable could not be identified",
808ec818d7fSHideki Saito           "NoIntegerInductionVariable", ORE, TheLoop);
8094f27730eSWarren Ristow       return false;
8109e97caf5SRenato Golin     } else {
8119e97caf5SRenato Golin       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
812f2ec16ccSHideki Saito     }
813f2ec16ccSHideki Saito   }
814f2ec16ccSHideki Saito 
815f2ec16ccSHideki Saito   // Now we know the widest induction type, check if our found induction
816f2ec16ccSHideki Saito   // is the same size. If it's not, unset it here and InnerLoopVectorizer
817f2ec16ccSHideki Saito   // will create another.
818f2ec16ccSHideki Saito   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
819f2ec16ccSHideki Saito     PrimaryInduction = nullptr;
820f2ec16ccSHideki Saito 
821f2ec16ccSHideki Saito   return true;
822f2ec16ccSHideki Saito }
823f2ec16ccSHideki Saito 
824f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() {
825f2ec16ccSHideki Saito   LAI = &(*GetLAA)(*TheLoop);
826f2ec16ccSHideki Saito   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
827f2ec16ccSHideki Saito   if (LAR) {
828f2ec16ccSHideki Saito     ORE->emit([&]() {
829f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
830f2ec16ccSHideki Saito                                         "loop not vectorized: ", *LAR);
831f2ec16ccSHideki Saito     });
832f2ec16ccSHideki Saito   }
833f2ec16ccSHideki Saito   if (!LAI->canVectorizeMemory())
834f2ec16ccSHideki Saito     return false;
835f2ec16ccSHideki Saito 
8365e9215f0SAnna Thomas   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
8379e97caf5SRenato Golin     reportVectorizationFailure("Stores to a uniform address",
8389e97caf5SRenato Golin         "write to a loop invariant address could not be vectorized",
839ec818d7fSHideki Saito         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
840f2ec16ccSHideki Saito     return false;
841f2ec16ccSHideki Saito   }
842f2ec16ccSHideki Saito   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
843f2ec16ccSHideki Saito   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
844f2ec16ccSHideki Saito 
845f2ec16ccSHideki Saito   return true;
846f2ec16ccSHideki Saito }
847f2ec16ccSHideki Saito 
848f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
849f2ec16ccSHideki Saito   Value *In0 = const_cast<Value *>(V);
850f2ec16ccSHideki Saito   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
851f2ec16ccSHideki Saito   if (!PN)
852f2ec16ccSHideki Saito     return false;
853f2ec16ccSHideki Saito 
854f2ec16ccSHideki Saito   return Inductions.count(PN);
855f2ec16ccSHideki Saito }
856f2ec16ccSHideki Saito 
857f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
858f2ec16ccSHideki Saito   auto *Inst = dyn_cast<Instruction>(V);
859f2ec16ccSHideki Saito   return (Inst && InductionCastsToIgnore.count(Inst));
860f2ec16ccSHideki Saito }
861f2ec16ccSHideki Saito 
862f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
863f2ec16ccSHideki Saito   return isInductionPhi(V) || isCastedInductionVariable(V);
864f2ec16ccSHideki Saito }
865f2ec16ccSHideki Saito 
866f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
867f2ec16ccSHideki Saito   return FirstOrderRecurrences.count(Phi);
868f2ec16ccSHideki Saito }
869f2ec16ccSHideki Saito 
870f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
871f2ec16ccSHideki Saito   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
872f2ec16ccSHideki Saito }
873f2ec16ccSHideki Saito 
874f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated(
875d57d73daSDorit Nuzman     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
876f2ec16ccSHideki Saito   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
877f2ec16ccSHideki Saito 
878f2ec16ccSHideki Saito   for (Instruction &I : *BB) {
879f2ec16ccSHideki Saito     // Check that we don't have a constant expression that can trap as operand.
880f2ec16ccSHideki Saito     for (Value *Operand : I.operands()) {
881f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(Operand))
882f2ec16ccSHideki Saito         if (C->canTrap())
883f2ec16ccSHideki Saito           return false;
884f2ec16ccSHideki Saito     }
885f2ec16ccSHideki Saito     // We might be able to hoist the load.
886f2ec16ccSHideki Saito     if (I.mayReadFromMemory()) {
887f2ec16ccSHideki Saito       auto *LI = dyn_cast<LoadInst>(&I);
888f2ec16ccSHideki Saito       if (!LI)
889f2ec16ccSHideki Saito         return false;
890f2ec16ccSHideki Saito       if (!SafePtrs.count(LI->getPointerOperand())) {
891f2ec16ccSHideki Saito         // !llvm.mem.parallel_loop_access implies if-conversion safety.
892f2ec16ccSHideki Saito         // Otherwise, record that the load needs (real or emulated) masking
893f2ec16ccSHideki Saito         // and let the cost model decide.
894d57d73daSDorit Nuzman         if (!IsAnnotatedParallel || PreserveGuards)
895f2ec16ccSHideki Saito           MaskedOp.insert(LI);
896f2ec16ccSHideki Saito         continue;
897f2ec16ccSHideki Saito       }
898f2ec16ccSHideki Saito     }
899f2ec16ccSHideki Saito 
900f2ec16ccSHideki Saito     if (I.mayWriteToMemory()) {
901f2ec16ccSHideki Saito       auto *SI = dyn_cast<StoreInst>(&I);
902f2ec16ccSHideki Saito       if (!SI)
903f2ec16ccSHideki Saito         return false;
904f2ec16ccSHideki Saito       // Predicated store requires some form of masking:
905f2ec16ccSHideki Saito       // 1) masked store HW instruction,
906f2ec16ccSHideki Saito       // 2) emulation via load-blend-store (only if safe and legal to do so,
907f2ec16ccSHideki Saito       //    be aware on the race conditions), or
908f2ec16ccSHideki Saito       // 3) element-by-element predicate check and scalar store.
909f2ec16ccSHideki Saito       MaskedOp.insert(SI);
910f2ec16ccSHideki Saito       continue;
911f2ec16ccSHideki Saito     }
912f2ec16ccSHideki Saito     if (I.mayThrow())
913f2ec16ccSHideki Saito       return false;
914f2ec16ccSHideki Saito   }
915f2ec16ccSHideki Saito 
916f2ec16ccSHideki Saito   return true;
917f2ec16ccSHideki Saito }
918f2ec16ccSHideki Saito 
919f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
920f2ec16ccSHideki Saito   if (!EnableIfConversion) {
9219e97caf5SRenato Golin     reportVectorizationFailure("If-conversion is disabled",
9229e97caf5SRenato Golin                                "if-conversion is disabled",
923ec818d7fSHideki Saito                                "IfConversionDisabled",
924ec818d7fSHideki Saito                                ORE, TheLoop);
925f2ec16ccSHideki Saito     return false;
926f2ec16ccSHideki Saito   }
927f2ec16ccSHideki Saito 
928f2ec16ccSHideki Saito   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
929f2ec16ccSHideki Saito 
930cf3b5559SPhilip Reames   // A list of pointers which are known to be dereferenceable within scope of
931cf3b5559SPhilip Reames   // the loop body for each iteration of the loop which executes.  That is,
932cf3b5559SPhilip Reames   // the memory pointed to can be dereferenced (with the access size implied by
933cf3b5559SPhilip Reames   // the value's type) unconditionally within the loop header without
934cf3b5559SPhilip Reames   // introducing a new fault.
935f2ec16ccSHideki Saito   SmallPtrSet<Value *, 8> SafePointes;
936f2ec16ccSHideki Saito 
937f2ec16ccSHideki Saito   // Collect safe addresses.
938f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
939f2ec16ccSHideki Saito     if (blockNeedsPredication(BB))
940f2ec16ccSHideki Saito       continue;
941f2ec16ccSHideki Saito 
942f2ec16ccSHideki Saito     for (Instruction &I : *BB)
943f2ec16ccSHideki Saito       if (auto *Ptr = getLoadStorePointerOperand(&I))
944f2ec16ccSHideki Saito         SafePointes.insert(Ptr);
945f2ec16ccSHideki Saito   }
946f2ec16ccSHideki Saito 
947f2ec16ccSHideki Saito   // Collect the blocks that need predication.
948f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
949f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
950f2ec16ccSHideki Saito     // We don't support switch statements inside loops.
951f2ec16ccSHideki Saito     if (!isa<BranchInst>(BB->getTerminator())) {
9529e97caf5SRenato Golin       reportVectorizationFailure("Loop contains a switch statement",
9539e97caf5SRenato Golin                                  "loop contains a switch statement",
954ec818d7fSHideki Saito                                  "LoopContainsSwitch", ORE, TheLoop,
955ec818d7fSHideki Saito                                  BB->getTerminator());
956f2ec16ccSHideki Saito       return false;
957f2ec16ccSHideki Saito     }
958f2ec16ccSHideki Saito 
959f2ec16ccSHideki Saito     // We must be able to predicate all blocks that need to be predicated.
960f2ec16ccSHideki Saito     if (blockNeedsPredication(BB)) {
961f2ec16ccSHideki Saito       if (!blockCanBePredicated(BB, SafePointes)) {
9629e97caf5SRenato Golin         reportVectorizationFailure(
9639e97caf5SRenato Golin             "Control flow cannot be substituted for a select",
9649e97caf5SRenato Golin             "control flow cannot be substituted for a select",
965ec818d7fSHideki Saito             "NoCFGForSelect", ORE, TheLoop,
966ec818d7fSHideki Saito             BB->getTerminator());
967f2ec16ccSHideki Saito         return false;
968f2ec16ccSHideki Saito       }
969f2ec16ccSHideki Saito     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
9709e97caf5SRenato Golin       reportVectorizationFailure(
9719e97caf5SRenato Golin           "Control flow cannot be substituted for a select",
9729e97caf5SRenato Golin           "control flow cannot be substituted for a select",
973ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
974ec818d7fSHideki Saito           BB->getTerminator());
975f2ec16ccSHideki Saito       return false;
976f2ec16ccSHideki Saito     }
977f2ec16ccSHideki Saito   }
978f2ec16ccSHideki Saito 
979f2ec16ccSHideki Saito   // We can if-convert this loop.
980f2ec16ccSHideki Saito   return true;
981f2ec16ccSHideki Saito }
982f2ec16ccSHideki Saito 
983f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG.
984f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
985f2ec16ccSHideki Saito                                                     bool UseVPlanNativePath) {
986f2ec16ccSHideki Saito   assert((UseVPlanNativePath || Lp->empty()) &&
987f2ec16ccSHideki Saito          "VPlan-native path is not enabled.");
988f2ec16ccSHideki Saito 
989f2ec16ccSHideki Saito   // TODO: ORE should be improved to show more accurate information when an
990f2ec16ccSHideki Saito   // outer loop can't be vectorized because a nested loop is not understood or
991f2ec16ccSHideki Saito   // legal. Something like: "outer_loop_location: loop not vectorized:
992f2ec16ccSHideki Saito   // (inner_loop_location) loop control flow is not understood by vectorizer".
993f2ec16ccSHideki Saito 
994f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
995f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
996f2ec16ccSHideki Saito   bool Result = true;
997f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
998f2ec16ccSHideki Saito 
999f2ec16ccSHideki Saito   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1000f2ec16ccSHideki Saito   // be canonicalized.
1001f2ec16ccSHideki Saito   if (!Lp->getLoopPreheader()) {
10029e97caf5SRenato Golin     reportVectorizationFailure("Loop doesn't have a legal pre-header",
10039e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1004ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1005f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1006f2ec16ccSHideki Saito       Result = false;
1007f2ec16ccSHideki Saito     else
1008f2ec16ccSHideki Saito       return false;
1009f2ec16ccSHideki Saito   }
1010f2ec16ccSHideki Saito 
1011f2ec16ccSHideki Saito   // We must have a single backedge.
1012f2ec16ccSHideki Saito   if (Lp->getNumBackEdges() != 1) {
10139e97caf5SRenato Golin     reportVectorizationFailure("The loop must have a single backedge",
10149e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1015ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1016f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1017f2ec16ccSHideki Saito       Result = false;
1018f2ec16ccSHideki Saito     else
1019f2ec16ccSHideki Saito       return false;
1020f2ec16ccSHideki Saito   }
1021f2ec16ccSHideki Saito 
1022f2ec16ccSHideki Saito   // We must have a single exiting block.
1023f2ec16ccSHideki Saito   if (!Lp->getExitingBlock()) {
10249e97caf5SRenato Golin     reportVectorizationFailure("The loop must have an exiting block",
10259e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1026ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1027f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1028f2ec16ccSHideki Saito       Result = false;
1029f2ec16ccSHideki Saito     else
1030f2ec16ccSHideki Saito       return false;
1031f2ec16ccSHideki Saito   }
1032f2ec16ccSHideki Saito 
1033f2ec16ccSHideki Saito   // We only handle bottom-tested loops, i.e. loop in which the condition is
1034f2ec16ccSHideki Saito   // checked at the end of each iteration. With that we can assume that all
1035f2ec16ccSHideki Saito   // instructions in the loop are executed the same number of times.
1036f2ec16ccSHideki Saito   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
10379e97caf5SRenato Golin     reportVectorizationFailure("The exiting block is not the loop latch",
10389e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1039ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1040f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1041f2ec16ccSHideki Saito       Result = false;
1042f2ec16ccSHideki Saito     else
1043f2ec16ccSHideki Saito       return false;
1044f2ec16ccSHideki Saito   }
1045f2ec16ccSHideki Saito 
1046f2ec16ccSHideki Saito   return Result;
1047f2ec16ccSHideki Saito }
1048f2ec16ccSHideki Saito 
1049f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1050f2ec16ccSHideki Saito     Loop *Lp, bool UseVPlanNativePath) {
1051f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1052f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1053f2ec16ccSHideki Saito   bool Result = true;
1054f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1055f2ec16ccSHideki Saito   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1056f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1057f2ec16ccSHideki Saito       Result = false;
1058f2ec16ccSHideki Saito     else
1059f2ec16ccSHideki Saito       return false;
1060f2ec16ccSHideki Saito   }
1061f2ec16ccSHideki Saito 
1062f2ec16ccSHideki Saito   // Recursively check whether the loop control flow of nested loops is
1063f2ec16ccSHideki Saito   // understood.
1064f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
1065f2ec16ccSHideki Saito     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1066f2ec16ccSHideki Saito       if (DoExtraAnalysis)
1067f2ec16ccSHideki Saito         Result = false;
1068f2ec16ccSHideki Saito       else
1069f2ec16ccSHideki Saito         return false;
1070f2ec16ccSHideki Saito     }
1071f2ec16ccSHideki Saito 
1072f2ec16ccSHideki Saito   return Result;
1073f2ec16ccSHideki Saito }
1074f2ec16ccSHideki Saito 
1075f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1076f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1077f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1078f2ec16ccSHideki Saito   bool Result = true;
1079f2ec16ccSHideki Saito 
1080f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1081f2ec16ccSHideki Saito   // Check whether the loop-related control flow in the loop nest is expected by
1082f2ec16ccSHideki Saito   // vectorizer.
1083f2ec16ccSHideki Saito   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1084f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1085f2ec16ccSHideki Saito       Result = false;
1086f2ec16ccSHideki Saito     else
1087f2ec16ccSHideki Saito       return false;
1088f2ec16ccSHideki Saito   }
1089f2ec16ccSHideki Saito 
1090f2ec16ccSHideki Saito   // We need to have a loop header.
1091d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1092f2ec16ccSHideki Saito                     << '\n');
1093f2ec16ccSHideki Saito 
1094f2ec16ccSHideki Saito   // Specific checks for outer loops. We skip the remaining legal checks at this
1095f2ec16ccSHideki Saito   // point because they don't support outer loops.
1096f2ec16ccSHideki Saito   if (!TheLoop->empty()) {
1097f2ec16ccSHideki Saito     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1098f2ec16ccSHideki Saito 
1099f2ec16ccSHideki Saito     if (!canVectorizeOuterLoop()) {
11009e97caf5SRenato Golin       reportVectorizationFailure("Unsupported outer loop",
11019e97caf5SRenato Golin                                  "unsupported outer loop",
1102ec818d7fSHideki Saito                                  "UnsupportedOuterLoop",
1103ec818d7fSHideki Saito                                  ORE, TheLoop);
1104f2ec16ccSHideki Saito       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1105f2ec16ccSHideki Saito       // outer loops.
1106f2ec16ccSHideki Saito       return false;
1107f2ec16ccSHideki Saito     }
1108f2ec16ccSHideki Saito 
1109d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1110f2ec16ccSHideki Saito     return Result;
1111f2ec16ccSHideki Saito   }
1112f2ec16ccSHideki Saito 
1113f2ec16ccSHideki Saito   assert(TheLoop->empty() && "Inner loop expected.");
1114f2ec16ccSHideki Saito   // Check if we can if-convert non-single-bb loops.
1115f2ec16ccSHideki Saito   unsigned NumBlocks = TheLoop->getNumBlocks();
1116f2ec16ccSHideki Saito   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1117d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1118f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1119f2ec16ccSHideki Saito       Result = false;
1120f2ec16ccSHideki Saito     else
1121f2ec16ccSHideki Saito       return false;
1122f2ec16ccSHideki Saito   }
1123f2ec16ccSHideki Saito 
1124f2ec16ccSHideki Saito   // Check if we can vectorize the instructions and CFG in this loop.
1125f2ec16ccSHideki Saito   if (!canVectorizeInstrs()) {
1126d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1127f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1128f2ec16ccSHideki Saito       Result = false;
1129f2ec16ccSHideki Saito     else
1130f2ec16ccSHideki Saito       return false;
1131f2ec16ccSHideki Saito   }
1132f2ec16ccSHideki Saito 
1133f2ec16ccSHideki Saito   // Go over each instruction and look at memory deps.
1134f2ec16ccSHideki Saito   if (!canVectorizeMemory()) {
1135d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1136f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1137f2ec16ccSHideki Saito       Result = false;
1138f2ec16ccSHideki Saito     else
1139f2ec16ccSHideki Saito       return false;
1140f2ec16ccSHideki Saito   }
1141f2ec16ccSHideki Saito 
1142d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1143f2ec16ccSHideki Saito                     << (LAI->getRuntimePointerChecking()->Need
1144f2ec16ccSHideki Saito                             ? " (with a runtime bound check)"
1145f2ec16ccSHideki Saito                             : "")
1146f2ec16ccSHideki Saito                     << "!\n");
1147f2ec16ccSHideki Saito 
1148f2ec16ccSHideki Saito   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1149f2ec16ccSHideki Saito   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1150f2ec16ccSHideki Saito     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1151f2ec16ccSHideki Saito 
1152f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
11539e97caf5SRenato Golin     reportVectorizationFailure("Too many SCEV checks needed",
11549e97caf5SRenato Golin         "Too many SCEV assumptions need to be made and checked at runtime",
1155ec818d7fSHideki Saito         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1156f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1157f2ec16ccSHideki Saito       Result = false;
1158f2ec16ccSHideki Saito     else
1159f2ec16ccSHideki Saito       return false;
1160f2ec16ccSHideki Saito   }
1161f2ec16ccSHideki Saito 
1162f2ec16ccSHideki Saito   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1163f2ec16ccSHideki Saito   // we can vectorize, and at this point we don't have any other mem analysis
1164f2ec16ccSHideki Saito   // which may limit our maximum vectorization factor, so just return true with
1165f2ec16ccSHideki Saito   // no restrictions.
1166f2ec16ccSHideki Saito   return Result;
1167f2ec16ccSHideki Saito }
1168f2ec16ccSHideki Saito 
1169d57d73daSDorit Nuzman bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1170b0b5312eSAyal Zaks 
1171b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1172b0b5312eSAyal Zaks 
1173b0b5312eSAyal Zaks   if (!PrimaryInduction) {
11749e97caf5SRenato Golin     reportVectorizationFailure(
11759e97caf5SRenato Golin         "No primary induction, cannot fold tail by masking",
11769e97caf5SRenato Golin         "Missing a primary induction variable in the loop, which is "
11779e97caf5SRenato Golin         "needed in order to fold tail by masking as required.",
1178ec818d7fSHideki Saito         "NoPrimaryInduction", ORE, TheLoop);
1179b0b5312eSAyal Zaks     return false;
1180b0b5312eSAyal Zaks   }
1181b0b5312eSAyal Zaks 
1182d15df0edSAyal Zaks   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1183b0b5312eSAyal Zaks 
1184d15df0edSAyal Zaks   for (auto &Reduction : *getReductionVars())
1185d15df0edSAyal Zaks     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1186d15df0edSAyal Zaks 
1187d15df0edSAyal Zaks   // TODO: handle non-reduction outside users when tail is folded by masking.
1188b0b5312eSAyal Zaks   for (auto *AE : AllowedExit) {
1189d15df0edSAyal Zaks     // Check that all users of allowed exit values are inside the loop or
1190d15df0edSAyal Zaks     // are the live-out of a reduction.
1191d15df0edSAyal Zaks     if (ReductionLiveOuts.count(AE))
1192d15df0edSAyal Zaks       continue;
1193b0b5312eSAyal Zaks     for (User *U : AE->users()) {
1194b0b5312eSAyal Zaks       Instruction *UI = cast<Instruction>(U);
1195b0b5312eSAyal Zaks       if (TheLoop->contains(UI))
1196b0b5312eSAyal Zaks         continue;
11979e97caf5SRenato Golin       reportVectorizationFailure(
11989e97caf5SRenato Golin           "Cannot fold tail by masking, loop has an outside user for",
11999e97caf5SRenato Golin           "Cannot fold tail by masking in the presence of live outs.",
1200ec818d7fSHideki Saito           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1201b0b5312eSAyal Zaks       return false;
1202b0b5312eSAyal Zaks     }
1203b0b5312eSAyal Zaks   }
1204b0b5312eSAyal Zaks 
1205b0b5312eSAyal Zaks   // The list of pointers that we can safely read and write to remains empty.
1206b0b5312eSAyal Zaks   SmallPtrSet<Value *, 8> SafePointers;
1207b0b5312eSAyal Zaks 
1208b0b5312eSAyal Zaks   // Check and mark all blocks for predication, including those that ordinarily
1209b0b5312eSAyal Zaks   // do not need predication such as the header block.
1210b0b5312eSAyal Zaks   for (BasicBlock *BB : TheLoop->blocks()) {
1211d57d73daSDorit Nuzman     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
12129e97caf5SRenato Golin       reportVectorizationFailure(
12139e97caf5SRenato Golin           "Cannot fold tail by masking as required",
12149e97caf5SRenato Golin           "control flow cannot be substituted for a select",
1215ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
1216ec818d7fSHideki Saito           BB->getTerminator());
1217b0b5312eSAyal Zaks       return false;
1218b0b5312eSAyal Zaks     }
1219b0b5312eSAyal Zaks   }
1220b0b5312eSAyal Zaks 
1221b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1222b0b5312eSAyal Zaks   return true;
1223b0b5312eSAyal Zaks }
1224b0b5312eSAyal Zaks 
1225f2ec16ccSHideki Saito } // namespace llvm
1226