1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===//
2f2ec16ccSHideki Saito //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f2ec16ccSHideki Saito //
7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===//
8f2ec16ccSHideki Saito //
9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code
10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time.
11f2ec16ccSHideki Saito //
12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis
13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there
14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first).
15f2ec16ccSHideki Saito //
16f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
177403569bSPhilip Reames #include "llvm/Analysis/Loads.h"
187403569bSPhilip Reames #include "llvm/Analysis/ValueTracking.h"
19f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h"
20f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h"
21*23c11380SFlorian Hahn #include "llvm/IR/PatternMatch.h"
22*23c11380SFlorian Hahn #include "llvm/Transforms/Vectorize/LoopVectorize.h"
23f2ec16ccSHideki Saito 
24f2ec16ccSHideki Saito using namespace llvm;
25*23c11380SFlorian Hahn using namespace PatternMatch;
26f2ec16ccSHideki Saito 
27f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize"
28f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME
29f2ec16ccSHideki Saito 
304e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication;
314e4ecae0SHideki Saito 
32f2ec16ccSHideki Saito static cl::opt<bool>
33f2ec16ccSHideki Saito     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
34f2ec16ccSHideki Saito                        cl::desc("Enable if-conversion during vectorization."));
35f2ec16ccSHideki Saito 
36f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
37f2ec16ccSHideki Saito     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
38f2ec16ccSHideki Saito     cl::desc("The maximum allowed number of runtime memory checks with a "
39f2ec16ccSHideki Saito              "vectorize(enable) pragma."));
40f2ec16ccSHideki Saito 
41f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
42f2ec16ccSHideki Saito     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
43f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed."));
44f2ec16ccSHideki Saito 
45f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
46f2ec16ccSHideki Saito     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
47f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed with a "
48f2ec16ccSHideki Saito              "vectorize(enable) pragma"));
49f2ec16ccSHideki Saito 
50f2ec16ccSHideki Saito /// Maximum vectorization interleave count.
51f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16;
52f2ec16ccSHideki Saito 
53f2ec16ccSHideki Saito namespace llvm {
54f2ec16ccSHideki Saito 
55f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) {
56f2ec16ccSHideki Saito   switch (Kind) {
57f2ec16ccSHideki Saito   case HK_WIDTH:
58f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
59f2ec16ccSHideki Saito   case HK_UNROLL:
60f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
61f2ec16ccSHideki Saito   case HK_FORCE:
62f2ec16ccSHideki Saito     return (Val <= 1);
63f2ec16ccSHideki Saito   case HK_ISVECTORIZED:
6420b198ecSSjoerd Meijer   case HK_PREDICATE:
65f2ec16ccSHideki Saito     return (Val == 0 || Val == 1);
66f2ec16ccSHideki Saito   }
67f2ec16ccSHideki Saito   return false;
68f2ec16ccSHideki Saito }
69f2ec16ccSHideki Saito 
70d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
71d4eb13c8SMichael Kruse                                        bool InterleaveOnlyWhenForced,
72f2ec16ccSHideki Saito                                        OptimizationRemarkEmitter &ORE)
73f2ec16ccSHideki Saito     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
74d4eb13c8SMichael Kruse       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
75f2ec16ccSHideki Saito       Force("vectorize.enable", FK_Undefined, HK_FORCE),
7620b198ecSSjoerd Meijer       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
77cb47b878SSjoerd Meijer       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
7820b198ecSSjoerd Meijer       ORE(ORE) {
79f2ec16ccSHideki Saito   // Populate values with existing loop metadata.
80f2ec16ccSHideki Saito   getHintsFromMetadata();
81f2ec16ccSHideki Saito 
82f2ec16ccSHideki Saito   // force-vector-interleave overrides DisableInterleaving.
83f2ec16ccSHideki Saito   if (VectorizerParams::isInterleaveForced())
84f2ec16ccSHideki Saito     Interleave.Value = VectorizerParams::VectorizationInterleave;
85f2ec16ccSHideki Saito 
86f2ec16ccSHideki Saito   if (IsVectorized.Value != 1)
87f2ec16ccSHideki Saito     // If the vectorization width and interleaving count are both 1 then
88f2ec16ccSHideki Saito     // consider the loop to have been already vectorized because there's
89f2ec16ccSHideki Saito     // nothing more that we can do.
90f2ec16ccSHideki Saito     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
91d4eb13c8SMichael Kruse   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
92f2ec16ccSHideki Saito              << "LV: Interleaving disabled by the pass manager\n");
93f2ec16ccSHideki Saito }
94f2ec16ccSHideki Saito 
9577a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() {
9677a614a6SMichael Kruse   LLVMContext &Context = TheLoop->getHeader()->getContext();
9777a614a6SMichael Kruse 
9877a614a6SMichael Kruse   MDNode *IsVectorizedMD = MDNode::get(
9977a614a6SMichael Kruse       Context,
10077a614a6SMichael Kruse       {MDString::get(Context, "llvm.loop.isvectorized"),
10177a614a6SMichael Kruse        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
10277a614a6SMichael Kruse   MDNode *LoopID = TheLoop->getLoopID();
10377a614a6SMichael Kruse   MDNode *NewLoopID =
10477a614a6SMichael Kruse       makePostTransformationMetadata(Context, LoopID,
10577a614a6SMichael Kruse                                      {Twine(Prefix(), "vectorize.").str(),
10677a614a6SMichael Kruse                                       Twine(Prefix(), "interleave.").str()},
10777a614a6SMichael Kruse                                      {IsVectorizedMD});
10877a614a6SMichael Kruse   TheLoop->setLoopID(NewLoopID);
10977a614a6SMichael Kruse 
11077a614a6SMichael Kruse   // Update internal cache.
11177a614a6SMichael Kruse   IsVectorized.Value = 1;
11277a614a6SMichael Kruse }
11377a614a6SMichael Kruse 
114d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization(
115d4eb13c8SMichael Kruse     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
116f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled) {
117d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
118f2ec16ccSHideki Saito     emitRemarkWithHints();
119f2ec16ccSHideki Saito     return false;
120f2ec16ccSHideki Saito   }
121f2ec16ccSHideki Saito 
122d4eb13c8SMichael Kruse   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
123d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
124f2ec16ccSHideki Saito     emitRemarkWithHints();
125f2ec16ccSHideki Saito     return false;
126f2ec16ccSHideki Saito   }
127f2ec16ccSHideki Saito 
128f2ec16ccSHideki Saito   if (getIsVectorized() == 1) {
129d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
130f2ec16ccSHideki Saito     // FIXME: Add interleave.disable metadata. This will allow
131f2ec16ccSHideki Saito     // vectorize.disable to be used without disabling the pass and errors
132f2ec16ccSHideki Saito     // to differentiate between disabled vectorization and a width of 1.
133f2ec16ccSHideki Saito     ORE.emit([&]() {
134f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
135f2ec16ccSHideki Saito                                         "AllDisabled", L->getStartLoc(),
136f2ec16ccSHideki Saito                                         L->getHeader())
137f2ec16ccSHideki Saito              << "loop not vectorized: vectorization and interleaving are "
138f2ec16ccSHideki Saito                 "explicitly disabled, or the loop has already been "
139f2ec16ccSHideki Saito                 "vectorized";
140f2ec16ccSHideki Saito     });
141f2ec16ccSHideki Saito     return false;
142f2ec16ccSHideki Saito   }
143f2ec16ccSHideki Saito 
144f2ec16ccSHideki Saito   return true;
145f2ec16ccSHideki Saito }
146f2ec16ccSHideki Saito 
147f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const {
148f2ec16ccSHideki Saito   using namespace ore;
149f2ec16ccSHideki Saito 
150f2ec16ccSHideki Saito   ORE.emit([&]() {
151f2ec16ccSHideki Saito     if (Force.Value == LoopVectorizeHints::FK_Disabled)
152f2ec16ccSHideki Saito       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
153f2ec16ccSHideki Saito                                       TheLoop->getStartLoc(),
154f2ec16ccSHideki Saito                                       TheLoop->getHeader())
155f2ec16ccSHideki Saito              << "loop not vectorized: vectorization is explicitly disabled";
156f2ec16ccSHideki Saito     else {
157f2ec16ccSHideki Saito       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
158f2ec16ccSHideki Saito                                  TheLoop->getStartLoc(), TheLoop->getHeader());
159f2ec16ccSHideki Saito       R << "loop not vectorized";
160f2ec16ccSHideki Saito       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
161f2ec16ccSHideki Saito         R << " (Force=" << NV("Force", true);
162f2ec16ccSHideki Saito         if (Width.Value != 0)
163f2ec16ccSHideki Saito           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
164f2ec16ccSHideki Saito         if (Interleave.Value != 0)
165f2ec16ccSHideki Saito           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
166f2ec16ccSHideki Saito         R << ")";
167f2ec16ccSHideki Saito       }
168f2ec16ccSHideki Saito       return R;
169f2ec16ccSHideki Saito     }
170f2ec16ccSHideki Saito   });
171f2ec16ccSHideki Saito }
172f2ec16ccSHideki Saito 
173f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
174f2ec16ccSHideki Saito   if (getWidth() == 1)
175f2ec16ccSHideki Saito     return LV_NAME;
176f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled)
177f2ec16ccSHideki Saito     return LV_NAME;
178f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
179f2ec16ccSHideki Saito     return LV_NAME;
180f2ec16ccSHideki Saito   return OptimizationRemarkAnalysis::AlwaysPrint;
181f2ec16ccSHideki Saito }
182f2ec16ccSHideki Saito 
183f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() {
184f2ec16ccSHideki Saito   MDNode *LoopID = TheLoop->getLoopID();
185f2ec16ccSHideki Saito   if (!LoopID)
186f2ec16ccSHideki Saito     return;
187f2ec16ccSHideki Saito 
188f2ec16ccSHideki Saito   // First operand should refer to the loop id itself.
189f2ec16ccSHideki Saito   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
190f2ec16ccSHideki Saito   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
191f2ec16ccSHideki Saito 
192f2ec16ccSHideki Saito   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
193f2ec16ccSHideki Saito     const MDString *S = nullptr;
194f2ec16ccSHideki Saito     SmallVector<Metadata *, 4> Args;
195f2ec16ccSHideki Saito 
196f2ec16ccSHideki Saito     // The expected hint is either a MDString or a MDNode with the first
197f2ec16ccSHideki Saito     // operand a MDString.
198f2ec16ccSHideki Saito     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
199f2ec16ccSHideki Saito       if (!MD || MD->getNumOperands() == 0)
200f2ec16ccSHideki Saito         continue;
201f2ec16ccSHideki Saito       S = dyn_cast<MDString>(MD->getOperand(0));
202f2ec16ccSHideki Saito       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
203f2ec16ccSHideki Saito         Args.push_back(MD->getOperand(i));
204f2ec16ccSHideki Saito     } else {
205f2ec16ccSHideki Saito       S = dyn_cast<MDString>(LoopID->getOperand(i));
206f2ec16ccSHideki Saito       assert(Args.size() == 0 && "too many arguments for MDString");
207f2ec16ccSHideki Saito     }
208f2ec16ccSHideki Saito 
209f2ec16ccSHideki Saito     if (!S)
210f2ec16ccSHideki Saito       continue;
211f2ec16ccSHideki Saito 
212f2ec16ccSHideki Saito     // Check if the hint starts with the loop metadata prefix.
213f2ec16ccSHideki Saito     StringRef Name = S->getString();
214f2ec16ccSHideki Saito     if (Args.size() == 1)
215f2ec16ccSHideki Saito       setHint(Name, Args[0]);
216f2ec16ccSHideki Saito   }
217f2ec16ccSHideki Saito }
218f2ec16ccSHideki Saito 
219f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
220f2ec16ccSHideki Saito   if (!Name.startswith(Prefix()))
221f2ec16ccSHideki Saito     return;
222f2ec16ccSHideki Saito   Name = Name.substr(Prefix().size(), StringRef::npos);
223f2ec16ccSHideki Saito 
224f2ec16ccSHideki Saito   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
225f2ec16ccSHideki Saito   if (!C)
226f2ec16ccSHideki Saito     return;
227f2ec16ccSHideki Saito   unsigned Val = C->getZExtValue();
228f2ec16ccSHideki Saito 
22920b198ecSSjoerd Meijer   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
230f2ec16ccSHideki Saito   for (auto H : Hints) {
231f2ec16ccSHideki Saito     if (Name == H->Name) {
232f2ec16ccSHideki Saito       if (H->validate(Val))
233f2ec16ccSHideki Saito         H->Value = Val;
234f2ec16ccSHideki Saito       else
235d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
236f2ec16ccSHideki Saito       break;
237f2ec16ccSHideki Saito     }
238f2ec16ccSHideki Saito   }
239f2ec16ccSHideki Saito }
240f2ec16ccSHideki Saito 
241f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet(
242f2ec16ccSHideki Saito     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
243f2ec16ccSHideki Saito   const char *PassName = Hints.vectorizeAnalysisPassName();
244f2ec16ccSHideki Saito   bool Failed = false;
245f2ec16ccSHideki Saito   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
246f2ec16ccSHideki Saito     ORE.emit([&]() {
247f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisFPCommute(
248f2ec16ccSHideki Saito                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
249f2ec16ccSHideki Saito                  UnsafeAlgebraInst->getParent())
250f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
251f2ec16ccSHideki Saito                 "floating-point operations";
252f2ec16ccSHideki Saito     });
253f2ec16ccSHideki Saito     Failed = true;
254f2ec16ccSHideki Saito   }
255f2ec16ccSHideki Saito 
256f2ec16ccSHideki Saito   // Test if runtime memcheck thresholds are exceeded.
257f2ec16ccSHideki Saito   bool PragmaThresholdReached =
258f2ec16ccSHideki Saito       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
259f2ec16ccSHideki Saito   bool ThresholdReached =
260f2ec16ccSHideki Saito       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
261f2ec16ccSHideki Saito   if ((ThresholdReached && !Hints.allowReordering()) ||
262f2ec16ccSHideki Saito       PragmaThresholdReached) {
263f2ec16ccSHideki Saito     ORE.emit([&]() {
264f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
265f2ec16ccSHideki Saito                                                 L->getStartLoc(),
266f2ec16ccSHideki Saito                                                 L->getHeader())
267f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
268f2ec16ccSHideki Saito                 "memory operations";
269f2ec16ccSHideki Saito     });
270d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
271f2ec16ccSHideki Saito     Failed = true;
272f2ec16ccSHideki Saito   }
273f2ec16ccSHideki Saito 
274f2ec16ccSHideki Saito   return Failed;
275f2ec16ccSHideki Saito }
276f2ec16ccSHideki Saito 
277f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop
278f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
279f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is
280f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is
281f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions:
282f2ec16ccSHideki Saito //   1) it has a canonical IV (starting from 0 and with stride 1),
283f2ec16ccSHideki Saito //   2) its latch terminator is a conditional branch and,
284f2ec16ccSHideki Saito //   3) its latch condition is a compare instruction whose operands are the
285f2ec16ccSHideki Saito //      canonical IV and an OuterLp invariant.
286f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not
287f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity.
288f2ec16ccSHideki Saito //
289f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation
290f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop
291f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and
292f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for
293f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we
294f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer
295f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit
296f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away
297f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not
298f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate
299f2ec16ccSHideki Saito // function that is only executed once for each \p Lp.
300f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
301f2ec16ccSHideki Saito   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
302f2ec16ccSHideki Saito 
303f2ec16ccSHideki Saito   // If Lp is the outer loop, it's uniform by definition.
304f2ec16ccSHideki Saito   if (Lp == OuterLp)
305f2ec16ccSHideki Saito     return true;
306f2ec16ccSHideki Saito   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
307f2ec16ccSHideki Saito 
308f2ec16ccSHideki Saito   // 1.
309f2ec16ccSHideki Saito   PHINode *IV = Lp->getCanonicalInductionVariable();
310f2ec16ccSHideki Saito   if (!IV) {
311d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
312f2ec16ccSHideki Saito     return false;
313f2ec16ccSHideki Saito   }
314f2ec16ccSHideki Saito 
315f2ec16ccSHideki Saito   // 2.
316f2ec16ccSHideki Saito   BasicBlock *Latch = Lp->getLoopLatch();
317f2ec16ccSHideki Saito   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
318f2ec16ccSHideki Saito   if (!LatchBr || LatchBr->isUnconditional()) {
319d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
320f2ec16ccSHideki Saito     return false;
321f2ec16ccSHideki Saito   }
322f2ec16ccSHideki Saito 
323f2ec16ccSHideki Saito   // 3.
324f2ec16ccSHideki Saito   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
325f2ec16ccSHideki Saito   if (!LatchCmp) {
326d34e60caSNicola Zaghen     LLVM_DEBUG(
327d34e60caSNicola Zaghen         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
328f2ec16ccSHideki Saito     return false;
329f2ec16ccSHideki Saito   }
330f2ec16ccSHideki Saito 
331f2ec16ccSHideki Saito   Value *CondOp0 = LatchCmp->getOperand(0);
332f2ec16ccSHideki Saito   Value *CondOp1 = LatchCmp->getOperand(1);
333f2ec16ccSHideki Saito   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
334f2ec16ccSHideki Saito   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
335f2ec16ccSHideki Saito       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
336d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
337f2ec16ccSHideki Saito     return false;
338f2ec16ccSHideki Saito   }
339f2ec16ccSHideki Saito 
340f2ec16ccSHideki Saito   return true;
341f2ec16ccSHideki Saito }
342f2ec16ccSHideki Saito 
343f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p
344f2ec16ccSHideki Saito // OuterLp.
345f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
346f2ec16ccSHideki Saito   if (!isUniformLoop(Lp, OuterLp))
347f2ec16ccSHideki Saito     return false;
348f2ec16ccSHideki Saito 
349f2ec16ccSHideki Saito   // Check if nested loops are uniform.
350f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
351f2ec16ccSHideki Saito     if (!isUniformLoopNest(SubLp, OuterLp))
352f2ec16ccSHideki Saito       return false;
353f2ec16ccSHideki Saito 
354f2ec16ccSHideki Saito   return true;
355f2ec16ccSHideki Saito }
356f2ec16ccSHideki Saito 
3575f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node.
358f2ec16ccSHideki Saito ///
359f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if
360f2ec16ccSHideki Saito /// convert.
361f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) {
362f2ec16ccSHideki Saito   for (PHINode &Phi : BB->phis()) {
363f2ec16ccSHideki Saito     for (Value *V : Phi.incoming_values())
364f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(V))
365f2ec16ccSHideki Saito         if (C->canTrap())
366f2ec16ccSHideki Saito           return false;
367f2ec16ccSHideki Saito   }
368f2ec16ccSHideki Saito   return true;
369f2ec16ccSHideki Saito }
370f2ec16ccSHideki Saito 
371f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
372f2ec16ccSHideki Saito   if (Ty->isPointerTy())
373f2ec16ccSHideki Saito     return DL.getIntPtrType(Ty);
374f2ec16ccSHideki Saito 
375f2ec16ccSHideki Saito   // It is possible that char's or short's overflow when we ask for the loop's
376f2ec16ccSHideki Saito   // trip count, work around this by changing the type size.
377f2ec16ccSHideki Saito   if (Ty->getScalarSizeInBits() < 32)
378f2ec16ccSHideki Saito     return Type::getInt32Ty(Ty->getContext());
379f2ec16ccSHideki Saito 
380f2ec16ccSHideki Saito   return Ty;
381f2ec16ccSHideki Saito }
382f2ec16ccSHideki Saito 
383f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
384f2ec16ccSHideki Saito   Ty0 = convertPointerToIntegerType(DL, Ty0);
385f2ec16ccSHideki Saito   Ty1 = convertPointerToIntegerType(DL, Ty1);
386f2ec16ccSHideki Saito   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
387f2ec16ccSHideki Saito     return Ty0;
388f2ec16ccSHideki Saito   return Ty1;
389f2ec16ccSHideki Saito }
390f2ec16ccSHideki Saito 
3915f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an
392f2ec16ccSHideki Saito /// identified reduction variable.
393f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
394f2ec16ccSHideki Saito                                SmallPtrSetImpl<Value *> &AllowedExit) {
39560a1e4ddSAnna Thomas   // Reductions, Inductions and non-header phis are allowed to have exit users. All
396f2ec16ccSHideki Saito   // other instructions must not have external users.
397f2ec16ccSHideki Saito   if (!AllowedExit.count(Inst))
398f2ec16ccSHideki Saito     // Check that all of the users of the loop are inside the BB.
399f2ec16ccSHideki Saito     for (User *U : Inst->users()) {
400f2ec16ccSHideki Saito       Instruction *UI = cast<Instruction>(U);
401f2ec16ccSHideki Saito       // This user may be a reduction exit value.
402f2ec16ccSHideki Saito       if (!TheLoop->contains(UI)) {
403d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
404f2ec16ccSHideki Saito         return true;
405f2ec16ccSHideki Saito       }
406f2ec16ccSHideki Saito     }
407f2ec16ccSHideki Saito   return false;
408f2ec16ccSHideki Saito }
409f2ec16ccSHideki Saito 
410f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
411f2ec16ccSHideki Saito   const ValueToValueMap &Strides =
412f2ec16ccSHideki Saito       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
413f2ec16ccSHideki Saito 
414d1170dbeSSjoerd Meijer   bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
415d1170dbeSSjoerd Meijer   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
416f2ec16ccSHideki Saito   if (Stride == 1 || Stride == -1)
417f2ec16ccSHideki Saito     return Stride;
418f2ec16ccSHideki Saito   return 0;
419f2ec16ccSHideki Saito }
420f2ec16ccSHideki Saito 
421f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) {
422f2ec16ccSHideki Saito   return LAI->isUniform(V);
423f2ec16ccSHideki Saito }
424f2ec16ccSHideki Saito 
425f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() {
426f2ec16ccSHideki Saito   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
427f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
428f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
429f2ec16ccSHideki Saito   bool Result = true;
430f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
431f2ec16ccSHideki Saito 
432f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
433f2ec16ccSHideki Saito     // Check whether the BB terminator is a BranchInst. Any other terminator is
434f2ec16ccSHideki Saito     // not supported yet.
435f2ec16ccSHideki Saito     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
436f2ec16ccSHideki Saito     if (!Br) {
4379e97caf5SRenato Golin       reportVectorizationFailure("Unsupported basic block terminator",
4389e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
439ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
440f2ec16ccSHideki Saito       if (DoExtraAnalysis)
441f2ec16ccSHideki Saito         Result = false;
442f2ec16ccSHideki Saito       else
443f2ec16ccSHideki Saito         return false;
444f2ec16ccSHideki Saito     }
445f2ec16ccSHideki Saito 
446f2ec16ccSHideki Saito     // Check whether the BranchInst is a supported one. Only unconditional
447f2ec16ccSHideki Saito     // branches, conditional branches with an outer loop invariant condition or
448f2ec16ccSHideki Saito     // backedges are supported.
4494e4ecae0SHideki Saito     // FIXME: We skip these checks when VPlan predication is enabled as we
4504e4ecae0SHideki Saito     // want to allow divergent branches. This whole check will be removed
4514e4ecae0SHideki Saito     // once VPlan predication is on by default.
4524e4ecae0SHideki Saito     if (!EnableVPlanPredication && Br && Br->isConditional() &&
453f2ec16ccSHideki Saito         !TheLoop->isLoopInvariant(Br->getCondition()) &&
454f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(0)) &&
455f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(1))) {
4569e97caf5SRenato Golin       reportVectorizationFailure("Unsupported conditional branch",
4579e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
458ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
459f2ec16ccSHideki Saito       if (DoExtraAnalysis)
460f2ec16ccSHideki Saito         Result = false;
461f2ec16ccSHideki Saito       else
462f2ec16ccSHideki Saito         return false;
463f2ec16ccSHideki Saito     }
464f2ec16ccSHideki Saito   }
465f2ec16ccSHideki Saito 
466f2ec16ccSHideki Saito   // Check whether inner loops are uniform. At this point, we only support
467f2ec16ccSHideki Saito   // simple outer loops scenarios with uniform nested loops.
468f2ec16ccSHideki Saito   if (!isUniformLoopNest(TheLoop /*loop nest*/,
469f2ec16ccSHideki Saito                          TheLoop /*context outer loop*/)) {
4709e97caf5SRenato Golin     reportVectorizationFailure("Outer loop contains divergent loops",
4719e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
472ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
473f2ec16ccSHideki Saito     if (DoExtraAnalysis)
474f2ec16ccSHideki Saito       Result = false;
475f2ec16ccSHideki Saito     else
476f2ec16ccSHideki Saito       return false;
477f2ec16ccSHideki Saito   }
478f2ec16ccSHideki Saito 
479ea7f3035SHideki Saito   // Check whether we are able to set up outer loop induction.
480ea7f3035SHideki Saito   if (!setupOuterLoopInductions()) {
4819e97caf5SRenato Golin     reportVectorizationFailure("Unsupported outer loop Phi(s)",
4829e97caf5SRenato Golin                                "Unsupported outer loop Phi(s)",
483ec818d7fSHideki Saito                                "UnsupportedPhi", ORE, TheLoop);
484ea7f3035SHideki Saito     if (DoExtraAnalysis)
485ea7f3035SHideki Saito       Result = false;
486ea7f3035SHideki Saito     else
487ea7f3035SHideki Saito       return false;
488ea7f3035SHideki Saito   }
489ea7f3035SHideki Saito 
490f2ec16ccSHideki Saito   return Result;
491f2ec16ccSHideki Saito }
492f2ec16ccSHideki Saito 
493f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi(
494f2ec16ccSHideki Saito     PHINode *Phi, const InductionDescriptor &ID,
495f2ec16ccSHideki Saito     SmallPtrSetImpl<Value *> &AllowedExit) {
496f2ec16ccSHideki Saito   Inductions[Phi] = ID;
497f2ec16ccSHideki Saito 
498f2ec16ccSHideki Saito   // In case this induction also comes with casts that we know we can ignore
499f2ec16ccSHideki Saito   // in the vectorized loop body, record them here. All casts could be recorded
500f2ec16ccSHideki Saito   // here for ignoring, but suffices to record only the first (as it is the
501f2ec16ccSHideki Saito   // only one that may bw used outside the cast sequence).
502f2ec16ccSHideki Saito   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
503f2ec16ccSHideki Saito   if (!Casts.empty())
504f2ec16ccSHideki Saito     InductionCastsToIgnore.insert(*Casts.begin());
505f2ec16ccSHideki Saito 
506f2ec16ccSHideki Saito   Type *PhiTy = Phi->getType();
507f2ec16ccSHideki Saito   const DataLayout &DL = Phi->getModule()->getDataLayout();
508f2ec16ccSHideki Saito 
509f2ec16ccSHideki Saito   // Get the widest type.
510f2ec16ccSHideki Saito   if (!PhiTy->isFloatingPointTy()) {
511f2ec16ccSHideki Saito     if (!WidestIndTy)
512f2ec16ccSHideki Saito       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
513f2ec16ccSHideki Saito     else
514f2ec16ccSHideki Saito       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
515f2ec16ccSHideki Saito   }
516f2ec16ccSHideki Saito 
517f2ec16ccSHideki Saito   // Int inductions are special because we only allow one IV.
518f2ec16ccSHideki Saito   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
519f2ec16ccSHideki Saito       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
520f2ec16ccSHideki Saito       isa<Constant>(ID.getStartValue()) &&
521f2ec16ccSHideki Saito       cast<Constant>(ID.getStartValue())->isNullValue()) {
522f2ec16ccSHideki Saito 
523f2ec16ccSHideki Saito     // Use the phi node with the widest type as induction. Use the last
524f2ec16ccSHideki Saito     // one if there are multiple (no good reason for doing this other
525f2ec16ccSHideki Saito     // than it is expedient). We've checked that it begins at zero and
526f2ec16ccSHideki Saito     // steps by one, so this is a canonical induction variable.
527f2ec16ccSHideki Saito     if (!PrimaryInduction || PhiTy == WidestIndTy)
528f2ec16ccSHideki Saito       PrimaryInduction = Phi;
529f2ec16ccSHideki Saito   }
530f2ec16ccSHideki Saito 
531f2ec16ccSHideki Saito   // Both the PHI node itself, and the "post-increment" value feeding
532f2ec16ccSHideki Saito   // back into the PHI node may have external users.
533f2ec16ccSHideki Saito   // We can allow those uses, except if the SCEVs we have for them rely
534f2ec16ccSHideki Saito   // on predicates that only hold within the loop, since allowing the exit
5356a1dd77fSAnna Thomas   // currently means re-using this SCEV outside the loop (see PR33706 for more
5366a1dd77fSAnna Thomas   // details).
537f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().isAlwaysTrue()) {
538f2ec16ccSHideki Saito     AllowedExit.insert(Phi);
539f2ec16ccSHideki Saito     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
540f2ec16ccSHideki Saito   }
541f2ec16ccSHideki Saito 
542d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
543f2ec16ccSHideki Saito }
544f2ec16ccSHideki Saito 
545ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() {
546ea7f3035SHideki Saito   BasicBlock *Header = TheLoop->getHeader();
547ea7f3035SHideki Saito 
548ea7f3035SHideki Saito   // Returns true if a given Phi is a supported induction.
549ea7f3035SHideki Saito   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
550ea7f3035SHideki Saito     InductionDescriptor ID;
551ea7f3035SHideki Saito     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
552ea7f3035SHideki Saito         ID.getKind() == InductionDescriptor::IK_IntInduction) {
553ea7f3035SHideki Saito       addInductionPhi(&Phi, ID, AllowedExit);
554ea7f3035SHideki Saito       return true;
555ea7f3035SHideki Saito     } else {
556ea7f3035SHideki Saito       // Bail out for any Phi in the outer loop header that is not a supported
557ea7f3035SHideki Saito       // induction.
558ea7f3035SHideki Saito       LLVM_DEBUG(
559ea7f3035SHideki Saito           dbgs()
560ea7f3035SHideki Saito           << "LV: Found unsupported PHI for outer loop vectorization.\n");
561ea7f3035SHideki Saito       return false;
562ea7f3035SHideki Saito     }
563ea7f3035SHideki Saito   };
564ea7f3035SHideki Saito 
565ea7f3035SHideki Saito   if (llvm::all_of(Header->phis(), isSupportedPhi))
566ea7f3035SHideki Saito     return true;
567ea7f3035SHideki Saito   else
568ea7f3035SHideki Saito     return false;
569ea7f3035SHideki Saito }
570ea7f3035SHideki Saito 
571f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() {
572f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
573f2ec16ccSHideki Saito 
574f2ec16ccSHideki Saito   // Look for the attribute signaling the absence of NaNs.
575f2ec16ccSHideki Saito   Function &F = *Header->getParent();
576f2ec16ccSHideki Saito   HasFunNoNaNAttr =
577f2ec16ccSHideki Saito       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
578f2ec16ccSHideki Saito 
579f2ec16ccSHideki Saito   // For each block in the loop.
580f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
581f2ec16ccSHideki Saito     // Scan the instructions in the block and look for hazards.
582f2ec16ccSHideki Saito     for (Instruction &I : *BB) {
583f2ec16ccSHideki Saito       if (auto *Phi = dyn_cast<PHINode>(&I)) {
584f2ec16ccSHideki Saito         Type *PhiTy = Phi->getType();
585f2ec16ccSHideki Saito         // Check that this PHI type is allowed.
586f2ec16ccSHideki Saito         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
587f2ec16ccSHideki Saito             !PhiTy->isPointerTy()) {
5889e97caf5SRenato Golin           reportVectorizationFailure("Found a non-int non-pointer PHI",
5899e97caf5SRenato Golin                                      "loop control flow is not understood by vectorizer",
590ec818d7fSHideki Saito                                      "CFGNotUnderstood", ORE, TheLoop);
591f2ec16ccSHideki Saito           return false;
592f2ec16ccSHideki Saito         }
593f2ec16ccSHideki Saito 
594f2ec16ccSHideki Saito         // If this PHINode is not in the header block, then we know that we
595f2ec16ccSHideki Saito         // can convert it to select during if-conversion. No need to check if
596f2ec16ccSHideki Saito         // the PHIs in this block are induction or reduction variables.
597f2ec16ccSHideki Saito         if (BB != Header) {
59860a1e4ddSAnna Thomas           // Non-header phi nodes that have outside uses can be vectorized. Add
59960a1e4ddSAnna Thomas           // them to the list of allowed exits.
60060a1e4ddSAnna Thomas           // Unsafe cyclic dependencies with header phis are identified during
60160a1e4ddSAnna Thomas           // legalization for reduction, induction and first order
60260a1e4ddSAnna Thomas           // recurrences.
603dd18ce45SBjorn Pettersson           AllowedExit.insert(&I);
604f2ec16ccSHideki Saito           continue;
605f2ec16ccSHideki Saito         }
606f2ec16ccSHideki Saito 
607f2ec16ccSHideki Saito         // We only allow if-converted PHIs with exactly two incoming values.
608f2ec16ccSHideki Saito         if (Phi->getNumIncomingValues() != 2) {
6099e97caf5SRenato Golin           reportVectorizationFailure("Found an invalid PHI",
6109e97caf5SRenato Golin               "loop control flow is not understood by vectorizer",
611ec818d7fSHideki Saito               "CFGNotUnderstood", ORE, TheLoop, Phi);
612f2ec16ccSHideki Saito           return false;
613f2ec16ccSHideki Saito         }
614f2ec16ccSHideki Saito 
615f2ec16ccSHideki Saito         RecurrenceDescriptor RedDes;
616f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
617f2ec16ccSHideki Saito                                                  DT)) {
618f2ec16ccSHideki Saito           if (RedDes.hasUnsafeAlgebra())
619f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
620f2ec16ccSHideki Saito           AllowedExit.insert(RedDes.getLoopExitInstr());
621f2ec16ccSHideki Saito           Reductions[Phi] = RedDes;
622f2ec16ccSHideki Saito           continue;
623f2ec16ccSHideki Saito         }
624f2ec16ccSHideki Saito 
625b02b0ad8SAnna Thomas         // TODO: Instead of recording the AllowedExit, it would be good to record the
626b02b0ad8SAnna Thomas         // complementary set: NotAllowedExit. These include (but may not be
627b02b0ad8SAnna Thomas         // limited to):
628b02b0ad8SAnna Thomas         // 1. Reduction phis as they represent the one-before-last value, which
629b02b0ad8SAnna Thomas         // is not available when vectorized
630b02b0ad8SAnna Thomas         // 2. Induction phis and increment when SCEV predicates cannot be used
631b02b0ad8SAnna Thomas         // outside the loop - see addInductionPhi
632b02b0ad8SAnna Thomas         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
633b02b0ad8SAnna Thomas         // outside the loop - see call to hasOutsideLoopUser in the non-phi
634b02b0ad8SAnna Thomas         // handling below
635b02b0ad8SAnna Thomas         // 4. FirstOrderRecurrence phis that can possibly be handled by
636b02b0ad8SAnna Thomas         // extraction.
637b02b0ad8SAnna Thomas         // By recording these, we can then reason about ways to vectorize each
638b02b0ad8SAnna Thomas         // of these NotAllowedExit.
639f2ec16ccSHideki Saito         InductionDescriptor ID;
640f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
641f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
642f2ec16ccSHideki Saito           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
643f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
644f2ec16ccSHideki Saito           continue;
645f2ec16ccSHideki Saito         }
646f2ec16ccSHideki Saito 
647f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
648f2ec16ccSHideki Saito                                                          SinkAfter, DT)) {
649f2ec16ccSHideki Saito           FirstOrderRecurrences.insert(Phi);
650f2ec16ccSHideki Saito           continue;
651f2ec16ccSHideki Saito         }
652f2ec16ccSHideki Saito 
653f2ec16ccSHideki Saito         // As a last resort, coerce the PHI to a AddRec expression
654f2ec16ccSHideki Saito         // and re-try classifying it a an induction PHI.
655f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
656f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
657f2ec16ccSHideki Saito           continue;
658f2ec16ccSHideki Saito         }
659f2ec16ccSHideki Saito 
6609e97caf5SRenato Golin         reportVectorizationFailure("Found an unidentified PHI",
6619e97caf5SRenato Golin             "value that could not be identified as "
6629e97caf5SRenato Golin             "reduction is used outside the loop",
663ec818d7fSHideki Saito             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
664f2ec16ccSHideki Saito         return false;
665f2ec16ccSHideki Saito       } // end of PHI handling
666f2ec16ccSHideki Saito 
667f2ec16ccSHideki Saito       // We handle calls that:
668f2ec16ccSHideki Saito       //   * Are debug info intrinsics.
669f2ec16ccSHideki Saito       //   * Have a mapping to an IR intrinsic.
670f2ec16ccSHideki Saito       //   * Have a vector version available.
671f2ec16ccSHideki Saito       auto *CI = dyn_cast<CallInst>(&I);
672f2ec16ccSHideki Saito       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
673f2ec16ccSHideki Saito           !isa<DbgInfoIntrinsic>(CI) &&
674f2ec16ccSHideki Saito           !(CI->getCalledFunction() && TLI &&
67519f73f0dSFrancesco Petrogalli             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
6767d65fe5cSSanjay Patel         // If the call is a recognized math libary call, it is likely that
6777d65fe5cSSanjay Patel         // we can vectorize it given loosened floating-point constraints.
6787d65fe5cSSanjay Patel         LibFunc Func;
6797d65fe5cSSanjay Patel         bool IsMathLibCall =
6807d65fe5cSSanjay Patel             TLI && CI->getCalledFunction() &&
6817d65fe5cSSanjay Patel             CI->getType()->isFloatingPointTy() &&
6827d65fe5cSSanjay Patel             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
6837d65fe5cSSanjay Patel             TLI->hasOptimizedCodeGen(Func);
6847d65fe5cSSanjay Patel 
6857d65fe5cSSanjay Patel         if (IsMathLibCall) {
6867d65fe5cSSanjay Patel           // TODO: Ideally, we should not use clang-specific language here,
6877d65fe5cSSanjay Patel           // but it's hard to provide meaningful yet generic advice.
6887d65fe5cSSanjay Patel           // Also, should this be guarded by allowExtraAnalysis() and/or be part
6897d65fe5cSSanjay Patel           // of the returned info from isFunctionVectorizable()?
69019f73f0dSFrancesco Petrogalli           reportVectorizationFailure("Found a non-intrinsic callsite",
6919e97caf5SRenato Golin               "library call cannot be vectorized. "
6927d65fe5cSSanjay Patel               "Try compiling with -fno-math-errno, -ffast-math, "
6939e97caf5SRenato Golin               "or similar flags",
694ec818d7fSHideki Saito               "CantVectorizeLibcall", ORE, TheLoop, CI);
6957d65fe5cSSanjay Patel         } else {
6969e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
6979e97caf5SRenato Golin                                      "call instruction cannot be vectorized",
698ec818d7fSHideki Saito                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
6997d65fe5cSSanjay Patel         }
700f2ec16ccSHideki Saito         return false;
701f2ec16ccSHideki Saito       }
702f2ec16ccSHideki Saito 
703a066f1f9SSimon Pilgrim       // Some intrinsics have scalar arguments and should be same in order for
704a066f1f9SSimon Pilgrim       // them to be vectorized (i.e. loop invariant).
705a066f1f9SSimon Pilgrim       if (CI) {
706f2ec16ccSHideki Saito         auto *SE = PSE.getSE();
707a066f1f9SSimon Pilgrim         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
708a066f1f9SSimon Pilgrim         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
709a066f1f9SSimon Pilgrim           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
710a066f1f9SSimon Pilgrim             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
7119e97caf5SRenato Golin               reportVectorizationFailure("Found unvectorizable intrinsic",
7129e97caf5SRenato Golin                   "intrinsic instruction cannot be vectorized",
713ec818d7fSHideki Saito                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
714f2ec16ccSHideki Saito               return false;
715f2ec16ccSHideki Saito             }
716f2ec16ccSHideki Saito           }
717a066f1f9SSimon Pilgrim       }
718f2ec16ccSHideki Saito 
719f2ec16ccSHideki Saito       // Check that the instruction return type is vectorizable.
720f2ec16ccSHideki Saito       // Also, we can't vectorize extractelement instructions.
721f2ec16ccSHideki Saito       if ((!VectorType::isValidElementType(I.getType()) &&
722f2ec16ccSHideki Saito            !I.getType()->isVoidTy()) ||
723f2ec16ccSHideki Saito           isa<ExtractElementInst>(I)) {
7249e97caf5SRenato Golin         reportVectorizationFailure("Found unvectorizable type",
7259e97caf5SRenato Golin             "instruction return type cannot be vectorized",
726ec818d7fSHideki Saito             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
727f2ec16ccSHideki Saito         return false;
728f2ec16ccSHideki Saito       }
729f2ec16ccSHideki Saito 
730f2ec16ccSHideki Saito       // Check that the stored type is vectorizable.
731f2ec16ccSHideki Saito       if (auto *ST = dyn_cast<StoreInst>(&I)) {
732f2ec16ccSHideki Saito         Type *T = ST->getValueOperand()->getType();
733f2ec16ccSHideki Saito         if (!VectorType::isValidElementType(T)) {
7349e97caf5SRenato Golin           reportVectorizationFailure("Store instruction cannot be vectorized",
7359e97caf5SRenato Golin                                      "store instruction cannot be vectorized",
736ec818d7fSHideki Saito                                      "CantVectorizeStore", ORE, TheLoop, ST);
737f2ec16ccSHideki Saito           return false;
738f2ec16ccSHideki Saito         }
739f2ec16ccSHideki Saito 
7406452bdd2SWarren Ristow         // For nontemporal stores, check that a nontemporal vector version is
7416452bdd2SWarren Ristow         // supported on the target.
7426452bdd2SWarren Ristow         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
7436452bdd2SWarren Ristow           // Arbitrarily try a vector of 2 elements.
7446452bdd2SWarren Ristow           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
7456452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of stored type");
7465e1e83eeSGuillaume Chatelet           const MaybeAlign Alignment = getLoadStoreAlignment(ST);
74733671cefSGuillaume Chatelet           assert(Alignment && "Alignment should be set");
7485e1e83eeSGuillaume Chatelet           if (!TTI->isLegalNTStore(VecTy, *Alignment)) {
7496452bdd2SWarren Ristow             reportVectorizationFailure(
7506452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
7516452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
752ec818d7fSHideki Saito                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
7536452bdd2SWarren Ristow             return false;
7546452bdd2SWarren Ristow           }
7556452bdd2SWarren Ristow         }
7566452bdd2SWarren Ristow 
7576452bdd2SWarren Ristow       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
7586452bdd2SWarren Ristow         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
7596452bdd2SWarren Ristow           // For nontemporal loads, check that a nontemporal vector version is
7606452bdd2SWarren Ristow           // supported on the target (arbitrarily try a vector of 2 elements).
7616452bdd2SWarren Ristow           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
7626452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of load type");
7635e1e83eeSGuillaume Chatelet           const MaybeAlign Alignment = getLoadStoreAlignment(LD);
76433671cefSGuillaume Chatelet           assert(Alignment && "Alignment should be set");
7655e1e83eeSGuillaume Chatelet           if (!TTI->isLegalNTLoad(VecTy, *Alignment)) {
7666452bdd2SWarren Ristow             reportVectorizationFailure(
7676452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
7686452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
769ec818d7fSHideki Saito                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
7706452bdd2SWarren Ristow             return false;
7716452bdd2SWarren Ristow           }
7726452bdd2SWarren Ristow         }
7736452bdd2SWarren Ristow 
774f2ec16ccSHideki Saito         // FP instructions can allow unsafe algebra, thus vectorizable by
775f2ec16ccSHideki Saito         // non-IEEE-754 compliant SIMD units.
776f2ec16ccSHideki Saito         // This applies to floating-point math operations and calls, not memory
777f2ec16ccSHideki Saito         // operations, shuffles, or casts, as they don't change precision or
778f2ec16ccSHideki Saito         // semantics.
779f2ec16ccSHideki Saito       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
780f2ec16ccSHideki Saito                  !I.isFast()) {
781d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
782f2ec16ccSHideki Saito         Hints->setPotentiallyUnsafe();
783f2ec16ccSHideki Saito       }
784f2ec16ccSHideki Saito 
785f2ec16ccSHideki Saito       // Reduction instructions are allowed to have exit users.
786f2ec16ccSHideki Saito       // All other instructions must not have external users.
787f2ec16ccSHideki Saito       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
788b02b0ad8SAnna Thomas         // We can safely vectorize loops where instructions within the loop are
789b02b0ad8SAnna Thomas         // used outside the loop only if the SCEV predicates within the loop is
790b02b0ad8SAnna Thomas         // same as outside the loop. Allowing the exit means reusing the SCEV
791b02b0ad8SAnna Thomas         // outside the loop.
792b02b0ad8SAnna Thomas         if (PSE.getUnionPredicate().isAlwaysTrue()) {
793b02b0ad8SAnna Thomas           AllowedExit.insert(&I);
794b02b0ad8SAnna Thomas           continue;
795b02b0ad8SAnna Thomas         }
7969e97caf5SRenato Golin         reportVectorizationFailure("Value cannot be used outside the loop",
7979e97caf5SRenato Golin                                    "value cannot be used outside the loop",
798ec818d7fSHideki Saito                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
799f2ec16ccSHideki Saito         return false;
800f2ec16ccSHideki Saito       }
801f2ec16ccSHideki Saito     } // next instr.
802f2ec16ccSHideki Saito   }
803f2ec16ccSHideki Saito 
804f2ec16ccSHideki Saito   if (!PrimaryInduction) {
805f2ec16ccSHideki Saito     if (Inductions.empty()) {
8069e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8079e97caf5SRenato Golin           "loop induction variable could not be identified",
808ec818d7fSHideki Saito           "NoInductionVariable", ORE, TheLoop);
809f2ec16ccSHideki Saito       return false;
8104f27730eSWarren Ristow     } else if (!WidestIndTy) {
8119e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8129e97caf5SRenato Golin           "integer loop induction variable could not be identified",
813ec818d7fSHideki Saito           "NoIntegerInductionVariable", ORE, TheLoop);
8144f27730eSWarren Ristow       return false;
8159e97caf5SRenato Golin     } else {
8169e97caf5SRenato Golin       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
817f2ec16ccSHideki Saito     }
818f2ec16ccSHideki Saito   }
819f2ec16ccSHideki Saito 
8209d24933fSFlorian Hahn   // For first order recurrences, we use the previous value (incoming value from
8219d24933fSFlorian Hahn   // the latch) to check if it dominates all users of the recurrence. Bail out
8229d24933fSFlorian Hahn   // if we have to sink such an instruction for another recurrence, as the
8239d24933fSFlorian Hahn   // dominance requirement may not hold after sinking.
8249d24933fSFlorian Hahn   BasicBlock *LoopLatch = TheLoop->getLoopLatch();
8259d24933fSFlorian Hahn   if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
8269d24933fSFlorian Hahn         Instruction *V =
8279d24933fSFlorian Hahn             cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
8289d24933fSFlorian Hahn         return SinkAfter.find(V) != SinkAfter.end();
8299d24933fSFlorian Hahn       }))
8309d24933fSFlorian Hahn     return false;
8319d24933fSFlorian Hahn 
832f2ec16ccSHideki Saito   // Now we know the widest induction type, check if our found induction
833f2ec16ccSHideki Saito   // is the same size. If it's not, unset it here and InnerLoopVectorizer
834f2ec16ccSHideki Saito   // will create another.
835f2ec16ccSHideki Saito   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
836f2ec16ccSHideki Saito     PrimaryInduction = nullptr;
837f2ec16ccSHideki Saito 
838f2ec16ccSHideki Saito   return true;
839f2ec16ccSHideki Saito }
840f2ec16ccSHideki Saito 
841f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() {
842f2ec16ccSHideki Saito   LAI = &(*GetLAA)(*TheLoop);
843f2ec16ccSHideki Saito   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
844f2ec16ccSHideki Saito   if (LAR) {
845f2ec16ccSHideki Saito     ORE->emit([&]() {
846f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
847f2ec16ccSHideki Saito                                         "loop not vectorized: ", *LAR);
848f2ec16ccSHideki Saito     });
849f2ec16ccSHideki Saito   }
850f2ec16ccSHideki Saito   if (!LAI->canVectorizeMemory())
851f2ec16ccSHideki Saito     return false;
852f2ec16ccSHideki Saito 
8535e9215f0SAnna Thomas   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
8549e97caf5SRenato Golin     reportVectorizationFailure("Stores to a uniform address",
8559e97caf5SRenato Golin         "write to a loop invariant address could not be vectorized",
856ec818d7fSHideki Saito         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
857f2ec16ccSHideki Saito     return false;
858f2ec16ccSHideki Saito   }
859f2ec16ccSHideki Saito   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
860f2ec16ccSHideki Saito   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
861f2ec16ccSHideki Saito 
862f2ec16ccSHideki Saito   return true;
863f2ec16ccSHideki Saito }
864f2ec16ccSHideki Saito 
865f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
866f2ec16ccSHideki Saito   Value *In0 = const_cast<Value *>(V);
867f2ec16ccSHideki Saito   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
868f2ec16ccSHideki Saito   if (!PN)
869f2ec16ccSHideki Saito     return false;
870f2ec16ccSHideki Saito 
871f2ec16ccSHideki Saito   return Inductions.count(PN);
872f2ec16ccSHideki Saito }
873f2ec16ccSHideki Saito 
874f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
875f2ec16ccSHideki Saito   auto *Inst = dyn_cast<Instruction>(V);
876f2ec16ccSHideki Saito   return (Inst && InductionCastsToIgnore.count(Inst));
877f2ec16ccSHideki Saito }
878f2ec16ccSHideki Saito 
879f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
880f2ec16ccSHideki Saito   return isInductionPhi(V) || isCastedInductionVariable(V);
881f2ec16ccSHideki Saito }
882f2ec16ccSHideki Saito 
883f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
884f2ec16ccSHideki Saito   return FirstOrderRecurrences.count(Phi);
885f2ec16ccSHideki Saito }
886f2ec16ccSHideki Saito 
887f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
888f2ec16ccSHideki Saito   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
889f2ec16ccSHideki Saito }
890f2ec16ccSHideki Saito 
891f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated(
892d57d73daSDorit Nuzman     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
893f2ec16ccSHideki Saito   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
894f2ec16ccSHideki Saito 
895f2ec16ccSHideki Saito   for (Instruction &I : *BB) {
896f2ec16ccSHideki Saito     // Check that we don't have a constant expression that can trap as operand.
897f2ec16ccSHideki Saito     for (Value *Operand : I.operands()) {
898f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(Operand))
899f2ec16ccSHideki Saito         if (C->canTrap())
900f2ec16ccSHideki Saito           return false;
901f2ec16ccSHideki Saito     }
902*23c11380SFlorian Hahn 
903*23c11380SFlorian Hahn     // We can predicate blocks with calls to assume, as long as we drop them in
904*23c11380SFlorian Hahn     // case we flatten the CFG via predication.
905*23c11380SFlorian Hahn     if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
906*23c11380SFlorian Hahn       ConditionalAssumes.insert(&I);
907*23c11380SFlorian Hahn       continue;
908*23c11380SFlorian Hahn     }
909*23c11380SFlorian Hahn 
910f2ec16ccSHideki Saito     // We might be able to hoist the load.
911f2ec16ccSHideki Saito     if (I.mayReadFromMemory()) {
912f2ec16ccSHideki Saito       auto *LI = dyn_cast<LoadInst>(&I);
913f2ec16ccSHideki Saito       if (!LI)
914f2ec16ccSHideki Saito         return false;
915f2ec16ccSHideki Saito       if (!SafePtrs.count(LI->getPointerOperand())) {
916f2ec16ccSHideki Saito         // !llvm.mem.parallel_loop_access implies if-conversion safety.
917f2ec16ccSHideki Saito         // Otherwise, record that the load needs (real or emulated) masking
918f2ec16ccSHideki Saito         // and let the cost model decide.
919d57d73daSDorit Nuzman         if (!IsAnnotatedParallel || PreserveGuards)
920f2ec16ccSHideki Saito           MaskedOp.insert(LI);
921f2ec16ccSHideki Saito         continue;
922f2ec16ccSHideki Saito       }
923f2ec16ccSHideki Saito     }
924f2ec16ccSHideki Saito 
925f2ec16ccSHideki Saito     if (I.mayWriteToMemory()) {
926f2ec16ccSHideki Saito       auto *SI = dyn_cast<StoreInst>(&I);
927f2ec16ccSHideki Saito       if (!SI)
928f2ec16ccSHideki Saito         return false;
929f2ec16ccSHideki Saito       // Predicated store requires some form of masking:
930f2ec16ccSHideki Saito       // 1) masked store HW instruction,
931f2ec16ccSHideki Saito       // 2) emulation via load-blend-store (only if safe and legal to do so,
932f2ec16ccSHideki Saito       //    be aware on the race conditions), or
933f2ec16ccSHideki Saito       // 3) element-by-element predicate check and scalar store.
934f2ec16ccSHideki Saito       MaskedOp.insert(SI);
935f2ec16ccSHideki Saito       continue;
936f2ec16ccSHideki Saito     }
937f2ec16ccSHideki Saito     if (I.mayThrow())
938f2ec16ccSHideki Saito       return false;
939f2ec16ccSHideki Saito   }
940f2ec16ccSHideki Saito 
941f2ec16ccSHideki Saito   return true;
942f2ec16ccSHideki Saito }
943f2ec16ccSHideki Saito 
944f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
945f2ec16ccSHideki Saito   if (!EnableIfConversion) {
9469e97caf5SRenato Golin     reportVectorizationFailure("If-conversion is disabled",
9479e97caf5SRenato Golin                                "if-conversion is disabled",
948ec818d7fSHideki Saito                                "IfConversionDisabled",
949ec818d7fSHideki Saito                                ORE, TheLoop);
950f2ec16ccSHideki Saito     return false;
951f2ec16ccSHideki Saito   }
952f2ec16ccSHideki Saito 
953f2ec16ccSHideki Saito   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
954f2ec16ccSHideki Saito 
955cf3b5559SPhilip Reames   // A list of pointers which are known to be dereferenceable within scope of
956cf3b5559SPhilip Reames   // the loop body for each iteration of the loop which executes.  That is,
957cf3b5559SPhilip Reames   // the memory pointed to can be dereferenced (with the access size implied by
958cf3b5559SPhilip Reames   // the value's type) unconditionally within the loop header without
959cf3b5559SPhilip Reames   // introducing a new fault.
960f2ec16ccSHideki Saito   SmallPtrSet<Value *, 8> SafePointes;
961f2ec16ccSHideki Saito 
962f2ec16ccSHideki Saito   // Collect safe addresses.
963f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
9647403569bSPhilip Reames     if (!blockNeedsPredication(BB)) {
965f2ec16ccSHideki Saito       for (Instruction &I : *BB)
966f2ec16ccSHideki Saito         if (auto *Ptr = getLoadStorePointerOperand(&I))
967f2ec16ccSHideki Saito           SafePointes.insert(Ptr);
9687403569bSPhilip Reames       continue;
9697403569bSPhilip Reames     }
9707403569bSPhilip Reames 
9717403569bSPhilip Reames     // For a block which requires predication, a address may be safe to access
9727403569bSPhilip Reames     // in the loop w/o predication if we can prove dereferenceability facts
9737403569bSPhilip Reames     // sufficient to ensure it'll never fault within the loop. For the moment,
9747403569bSPhilip Reames     // we restrict this to loads; stores are more complicated due to
9757403569bSPhilip Reames     // concurrency restrictions.
9767403569bSPhilip Reames     ScalarEvolution &SE = *PSE.getSE();
9777403569bSPhilip Reames     for (Instruction &I : *BB) {
9787403569bSPhilip Reames       LoadInst *LI = dyn_cast<LoadInst>(&I);
9797403569bSPhilip Reames       if (LI && !mustSuppressSpeculation(*LI) &&
9807403569bSPhilip Reames           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
9817403569bSPhilip Reames         SafePointes.insert(LI->getPointerOperand());
9827403569bSPhilip Reames     }
983f2ec16ccSHideki Saito   }
984f2ec16ccSHideki Saito 
985f2ec16ccSHideki Saito   // Collect the blocks that need predication.
986f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
987f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
988f2ec16ccSHideki Saito     // We don't support switch statements inside loops.
989f2ec16ccSHideki Saito     if (!isa<BranchInst>(BB->getTerminator())) {
9909e97caf5SRenato Golin       reportVectorizationFailure("Loop contains a switch statement",
9919e97caf5SRenato Golin                                  "loop contains a switch statement",
992ec818d7fSHideki Saito                                  "LoopContainsSwitch", ORE, TheLoop,
993ec818d7fSHideki Saito                                  BB->getTerminator());
994f2ec16ccSHideki Saito       return false;
995f2ec16ccSHideki Saito     }
996f2ec16ccSHideki Saito 
997f2ec16ccSHideki Saito     // We must be able to predicate all blocks that need to be predicated.
998f2ec16ccSHideki Saito     if (blockNeedsPredication(BB)) {
999f2ec16ccSHideki Saito       if (!blockCanBePredicated(BB, SafePointes)) {
10009e97caf5SRenato Golin         reportVectorizationFailure(
10019e97caf5SRenato Golin             "Control flow cannot be substituted for a select",
10029e97caf5SRenato Golin             "control flow cannot be substituted for a select",
1003ec818d7fSHideki Saito             "NoCFGForSelect", ORE, TheLoop,
1004ec818d7fSHideki Saito             BB->getTerminator());
1005f2ec16ccSHideki Saito         return false;
1006f2ec16ccSHideki Saito       }
1007f2ec16ccSHideki Saito     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
10089e97caf5SRenato Golin       reportVectorizationFailure(
10099e97caf5SRenato Golin           "Control flow cannot be substituted for a select",
10109e97caf5SRenato Golin           "control flow cannot be substituted for a select",
1011ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
1012ec818d7fSHideki Saito           BB->getTerminator());
1013f2ec16ccSHideki Saito       return false;
1014f2ec16ccSHideki Saito     }
1015f2ec16ccSHideki Saito   }
1016f2ec16ccSHideki Saito 
1017f2ec16ccSHideki Saito   // We can if-convert this loop.
1018f2ec16ccSHideki Saito   return true;
1019f2ec16ccSHideki Saito }
1020f2ec16ccSHideki Saito 
1021f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG.
1022f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1023f2ec16ccSHideki Saito                                                     bool UseVPlanNativePath) {
1024f2ec16ccSHideki Saito   assert((UseVPlanNativePath || Lp->empty()) &&
1025f2ec16ccSHideki Saito          "VPlan-native path is not enabled.");
1026f2ec16ccSHideki Saito 
1027f2ec16ccSHideki Saito   // TODO: ORE should be improved to show more accurate information when an
1028f2ec16ccSHideki Saito   // outer loop can't be vectorized because a nested loop is not understood or
1029f2ec16ccSHideki Saito   // legal. Something like: "outer_loop_location: loop not vectorized:
1030f2ec16ccSHideki Saito   // (inner_loop_location) loop control flow is not understood by vectorizer".
1031f2ec16ccSHideki Saito 
1032f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1033f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1034f2ec16ccSHideki Saito   bool Result = true;
1035f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1036f2ec16ccSHideki Saito 
1037f2ec16ccSHideki Saito   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1038f2ec16ccSHideki Saito   // be canonicalized.
1039f2ec16ccSHideki Saito   if (!Lp->getLoopPreheader()) {
10409e97caf5SRenato Golin     reportVectorizationFailure("Loop doesn't have a legal pre-header",
10419e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1042ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1043f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1044f2ec16ccSHideki Saito       Result = false;
1045f2ec16ccSHideki Saito     else
1046f2ec16ccSHideki Saito       return false;
1047f2ec16ccSHideki Saito   }
1048f2ec16ccSHideki Saito 
1049f2ec16ccSHideki Saito   // We must have a single backedge.
1050f2ec16ccSHideki Saito   if (Lp->getNumBackEdges() != 1) {
10519e97caf5SRenato Golin     reportVectorizationFailure("The loop must have a single backedge",
10529e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1053ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1054f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1055f2ec16ccSHideki Saito       Result = false;
1056f2ec16ccSHideki Saito     else
1057f2ec16ccSHideki Saito       return false;
1058f2ec16ccSHideki Saito   }
1059f2ec16ccSHideki Saito 
1060f2ec16ccSHideki Saito   // We must have a single exiting block.
1061f2ec16ccSHideki Saito   if (!Lp->getExitingBlock()) {
10629e97caf5SRenato Golin     reportVectorizationFailure("The loop must have an exiting block",
10639e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1064ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1065f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1066f2ec16ccSHideki Saito       Result = false;
1067f2ec16ccSHideki Saito     else
1068f2ec16ccSHideki Saito       return false;
1069f2ec16ccSHideki Saito   }
1070f2ec16ccSHideki Saito 
1071f2ec16ccSHideki Saito   // We only handle bottom-tested loops, i.e. loop in which the condition is
1072f2ec16ccSHideki Saito   // checked at the end of each iteration. With that we can assume that all
1073f2ec16ccSHideki Saito   // instructions in the loop are executed the same number of times.
1074f2ec16ccSHideki Saito   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
10759e97caf5SRenato Golin     reportVectorizationFailure("The exiting block is not the loop latch",
10769e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1077ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1078f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1079f2ec16ccSHideki Saito       Result = false;
1080f2ec16ccSHideki Saito     else
1081f2ec16ccSHideki Saito       return false;
1082f2ec16ccSHideki Saito   }
1083f2ec16ccSHideki Saito 
1084f2ec16ccSHideki Saito   return Result;
1085f2ec16ccSHideki Saito }
1086f2ec16ccSHideki Saito 
1087f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1088f2ec16ccSHideki Saito     Loop *Lp, bool UseVPlanNativePath) {
1089f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1090f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1091f2ec16ccSHideki Saito   bool Result = true;
1092f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1093f2ec16ccSHideki Saito   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1094f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1095f2ec16ccSHideki Saito       Result = false;
1096f2ec16ccSHideki Saito     else
1097f2ec16ccSHideki Saito       return false;
1098f2ec16ccSHideki Saito   }
1099f2ec16ccSHideki Saito 
1100f2ec16ccSHideki Saito   // Recursively check whether the loop control flow of nested loops is
1101f2ec16ccSHideki Saito   // understood.
1102f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
1103f2ec16ccSHideki Saito     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1104f2ec16ccSHideki Saito       if (DoExtraAnalysis)
1105f2ec16ccSHideki Saito         Result = false;
1106f2ec16ccSHideki Saito       else
1107f2ec16ccSHideki Saito         return false;
1108f2ec16ccSHideki Saito     }
1109f2ec16ccSHideki Saito 
1110f2ec16ccSHideki Saito   return Result;
1111f2ec16ccSHideki Saito }
1112f2ec16ccSHideki Saito 
1113f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1114f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1115f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1116f2ec16ccSHideki Saito   bool Result = true;
1117f2ec16ccSHideki Saito 
1118f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1119f2ec16ccSHideki Saito   // Check whether the loop-related control flow in the loop nest is expected by
1120f2ec16ccSHideki Saito   // vectorizer.
1121f2ec16ccSHideki Saito   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1122f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1123f2ec16ccSHideki Saito       Result = false;
1124f2ec16ccSHideki Saito     else
1125f2ec16ccSHideki Saito       return false;
1126f2ec16ccSHideki Saito   }
1127f2ec16ccSHideki Saito 
1128f2ec16ccSHideki Saito   // We need to have a loop header.
1129d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1130f2ec16ccSHideki Saito                     << '\n');
1131f2ec16ccSHideki Saito 
1132f2ec16ccSHideki Saito   // Specific checks for outer loops. We skip the remaining legal checks at this
1133f2ec16ccSHideki Saito   // point because they don't support outer loops.
1134f2ec16ccSHideki Saito   if (!TheLoop->empty()) {
1135f2ec16ccSHideki Saito     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1136f2ec16ccSHideki Saito 
1137f2ec16ccSHideki Saito     if (!canVectorizeOuterLoop()) {
11389e97caf5SRenato Golin       reportVectorizationFailure("Unsupported outer loop",
11399e97caf5SRenato Golin                                  "unsupported outer loop",
1140ec818d7fSHideki Saito                                  "UnsupportedOuterLoop",
1141ec818d7fSHideki Saito                                  ORE, TheLoop);
1142f2ec16ccSHideki Saito       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1143f2ec16ccSHideki Saito       // outer loops.
1144f2ec16ccSHideki Saito       return false;
1145f2ec16ccSHideki Saito     }
1146f2ec16ccSHideki Saito 
1147d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1148f2ec16ccSHideki Saito     return Result;
1149f2ec16ccSHideki Saito   }
1150f2ec16ccSHideki Saito 
1151f2ec16ccSHideki Saito   assert(TheLoop->empty() && "Inner loop expected.");
1152f2ec16ccSHideki Saito   // Check if we can if-convert non-single-bb loops.
1153f2ec16ccSHideki Saito   unsigned NumBlocks = TheLoop->getNumBlocks();
1154f2ec16ccSHideki Saito   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1155d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1156f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1157f2ec16ccSHideki Saito       Result = false;
1158f2ec16ccSHideki Saito     else
1159f2ec16ccSHideki Saito       return false;
1160f2ec16ccSHideki Saito   }
1161f2ec16ccSHideki Saito 
1162f2ec16ccSHideki Saito   // Check if we can vectorize the instructions and CFG in this loop.
1163f2ec16ccSHideki Saito   if (!canVectorizeInstrs()) {
1164d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1165f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1166f2ec16ccSHideki Saito       Result = false;
1167f2ec16ccSHideki Saito     else
1168f2ec16ccSHideki Saito       return false;
1169f2ec16ccSHideki Saito   }
1170f2ec16ccSHideki Saito 
1171f2ec16ccSHideki Saito   // Go over each instruction and look at memory deps.
1172f2ec16ccSHideki Saito   if (!canVectorizeMemory()) {
1173d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1174f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1175f2ec16ccSHideki Saito       Result = false;
1176f2ec16ccSHideki Saito     else
1177f2ec16ccSHideki Saito       return false;
1178f2ec16ccSHideki Saito   }
1179f2ec16ccSHideki Saito 
1180d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1181f2ec16ccSHideki Saito                     << (LAI->getRuntimePointerChecking()->Need
1182f2ec16ccSHideki Saito                             ? " (with a runtime bound check)"
1183f2ec16ccSHideki Saito                             : "")
1184f2ec16ccSHideki Saito                     << "!\n");
1185f2ec16ccSHideki Saito 
1186f2ec16ccSHideki Saito   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1187f2ec16ccSHideki Saito   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1188f2ec16ccSHideki Saito     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1189f2ec16ccSHideki Saito 
1190f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
11919e97caf5SRenato Golin     reportVectorizationFailure("Too many SCEV checks needed",
11929e97caf5SRenato Golin         "Too many SCEV assumptions need to be made and checked at runtime",
1193ec818d7fSHideki Saito         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1194f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1195f2ec16ccSHideki Saito       Result = false;
1196f2ec16ccSHideki Saito     else
1197f2ec16ccSHideki Saito       return false;
1198f2ec16ccSHideki Saito   }
1199f2ec16ccSHideki Saito 
1200f2ec16ccSHideki Saito   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1201f2ec16ccSHideki Saito   // we can vectorize, and at this point we don't have any other mem analysis
1202f2ec16ccSHideki Saito   // which may limit our maximum vectorization factor, so just return true with
1203f2ec16ccSHideki Saito   // no restrictions.
1204f2ec16ccSHideki Saito   return Result;
1205f2ec16ccSHideki Saito }
1206f2ec16ccSHideki Saito 
1207d57d73daSDorit Nuzman bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1208b0b5312eSAyal Zaks 
1209b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1210b0b5312eSAyal Zaks 
1211b0b5312eSAyal Zaks   if (!PrimaryInduction) {
12129e97caf5SRenato Golin     reportVectorizationFailure(
12139e97caf5SRenato Golin         "No primary induction, cannot fold tail by masking",
12149e97caf5SRenato Golin         "Missing a primary induction variable in the loop, which is "
12159e97caf5SRenato Golin         "needed in order to fold tail by masking as required.",
1216ec818d7fSHideki Saito         "NoPrimaryInduction", ORE, TheLoop);
1217b0b5312eSAyal Zaks     return false;
1218b0b5312eSAyal Zaks   }
1219b0b5312eSAyal Zaks 
1220d15df0edSAyal Zaks   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1221b0b5312eSAyal Zaks 
1222d15df0edSAyal Zaks   for (auto &Reduction : *getReductionVars())
1223d15df0edSAyal Zaks     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1224d15df0edSAyal Zaks 
1225d15df0edSAyal Zaks   // TODO: handle non-reduction outside users when tail is folded by masking.
1226b0b5312eSAyal Zaks   for (auto *AE : AllowedExit) {
1227d15df0edSAyal Zaks     // Check that all users of allowed exit values are inside the loop or
1228d15df0edSAyal Zaks     // are the live-out of a reduction.
1229d15df0edSAyal Zaks     if (ReductionLiveOuts.count(AE))
1230d15df0edSAyal Zaks       continue;
1231b0b5312eSAyal Zaks     for (User *U : AE->users()) {
1232b0b5312eSAyal Zaks       Instruction *UI = cast<Instruction>(U);
1233b0b5312eSAyal Zaks       if (TheLoop->contains(UI))
1234b0b5312eSAyal Zaks         continue;
12359e97caf5SRenato Golin       reportVectorizationFailure(
12369e97caf5SRenato Golin           "Cannot fold tail by masking, loop has an outside user for",
12379e97caf5SRenato Golin           "Cannot fold tail by masking in the presence of live outs.",
1238ec818d7fSHideki Saito           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1239b0b5312eSAyal Zaks       return false;
1240b0b5312eSAyal Zaks     }
1241b0b5312eSAyal Zaks   }
1242b0b5312eSAyal Zaks 
1243b0b5312eSAyal Zaks   // The list of pointers that we can safely read and write to remains empty.
1244b0b5312eSAyal Zaks   SmallPtrSet<Value *, 8> SafePointers;
1245b0b5312eSAyal Zaks 
1246b0b5312eSAyal Zaks   // Check and mark all blocks for predication, including those that ordinarily
1247b0b5312eSAyal Zaks   // do not need predication such as the header block.
1248b0b5312eSAyal Zaks   for (BasicBlock *BB : TheLoop->blocks()) {
1249d57d73daSDorit Nuzman     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
12509e97caf5SRenato Golin       reportVectorizationFailure(
12519e97caf5SRenato Golin           "Cannot fold tail by masking as required",
12529e97caf5SRenato Golin           "control flow cannot be substituted for a select",
1253ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
1254ec818d7fSHideki Saito           BB->getTerminator());
1255b0b5312eSAyal Zaks       return false;
1256b0b5312eSAyal Zaks     }
1257b0b5312eSAyal Zaks   }
1258b0b5312eSAyal Zaks 
1259b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1260b0b5312eSAyal Zaks   return true;
1261b0b5312eSAyal Zaks }
1262b0b5312eSAyal Zaks 
1263f2ec16ccSHideki Saito } // namespace llvm
1264