1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===//
2f2ec16ccSHideki Saito //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f2ec16ccSHideki Saito //
7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===//
8f2ec16ccSHideki Saito //
9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code
10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time.
11f2ec16ccSHideki Saito //
12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis
13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there
14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first).
15f2ec16ccSHideki Saito //
16f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
177403569bSPhilip Reames #include "llvm/Analysis/Loads.h"
18*a5f1f9c9SSimon Pilgrim #include "llvm/Analysis/LoopInfo.h"
197403569bSPhilip Reames #include "llvm/Analysis/ValueTracking.h"
20f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h"
21f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h"
2223c11380SFlorian Hahn #include "llvm/IR/PatternMatch.h"
2323c11380SFlorian Hahn #include "llvm/Transforms/Vectorize/LoopVectorize.h"
24f2ec16ccSHideki Saito 
25f2ec16ccSHideki Saito using namespace llvm;
2623c11380SFlorian Hahn using namespace PatternMatch;
27f2ec16ccSHideki Saito 
28f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize"
29f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME
30f2ec16ccSHideki Saito 
314e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication;
324e4ecae0SHideki Saito 
33f2ec16ccSHideki Saito static cl::opt<bool>
34f2ec16ccSHideki Saito     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
35f2ec16ccSHideki Saito                        cl::desc("Enable if-conversion during vectorization."));
36f2ec16ccSHideki Saito 
37f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
38f2ec16ccSHideki Saito     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
39f2ec16ccSHideki Saito     cl::desc("The maximum allowed number of runtime memory checks with a "
40f2ec16ccSHideki Saito              "vectorize(enable) pragma."));
41f2ec16ccSHideki Saito 
42f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
43f2ec16ccSHideki Saito     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
44f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed."));
45f2ec16ccSHideki Saito 
46f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
47f2ec16ccSHideki Saito     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
48f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed with a "
49f2ec16ccSHideki Saito              "vectorize(enable) pragma"));
50f2ec16ccSHideki Saito 
51f2ec16ccSHideki Saito /// Maximum vectorization interleave count.
52f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16;
53f2ec16ccSHideki Saito 
54f2ec16ccSHideki Saito namespace llvm {
55f2ec16ccSHideki Saito 
56f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) {
57f2ec16ccSHideki Saito   switch (Kind) {
58f2ec16ccSHideki Saito   case HK_WIDTH:
59f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
60f2ec16ccSHideki Saito   case HK_UNROLL:
61f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
62f2ec16ccSHideki Saito   case HK_FORCE:
63f2ec16ccSHideki Saito     return (Val <= 1);
64f2ec16ccSHideki Saito   case HK_ISVECTORIZED:
6520b198ecSSjoerd Meijer   case HK_PREDICATE:
66f2ec16ccSHideki Saito     return (Val == 0 || Val == 1);
67f2ec16ccSHideki Saito   }
68f2ec16ccSHideki Saito   return false;
69f2ec16ccSHideki Saito }
70f2ec16ccSHideki Saito 
71d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
72d4eb13c8SMichael Kruse                                        bool InterleaveOnlyWhenForced,
73f2ec16ccSHideki Saito                                        OptimizationRemarkEmitter &ORE)
74f2ec16ccSHideki Saito     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
75d4eb13c8SMichael Kruse       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
76f2ec16ccSHideki Saito       Force("vectorize.enable", FK_Undefined, HK_FORCE),
7720b198ecSSjoerd Meijer       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
78cb47b878SSjoerd Meijer       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
7920b198ecSSjoerd Meijer       ORE(ORE) {
80f2ec16ccSHideki Saito   // Populate values with existing loop metadata.
81f2ec16ccSHideki Saito   getHintsFromMetadata();
82f2ec16ccSHideki Saito 
83f2ec16ccSHideki Saito   // force-vector-interleave overrides DisableInterleaving.
84f2ec16ccSHideki Saito   if (VectorizerParams::isInterleaveForced())
85f2ec16ccSHideki Saito     Interleave.Value = VectorizerParams::VectorizationInterleave;
86f2ec16ccSHideki Saito 
87f2ec16ccSHideki Saito   if (IsVectorized.Value != 1)
88f2ec16ccSHideki Saito     // If the vectorization width and interleaving count are both 1 then
89f2ec16ccSHideki Saito     // consider the loop to have been already vectorized because there's
90f2ec16ccSHideki Saito     // nothing more that we can do.
91f2ec16ccSHideki Saito     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
92d4eb13c8SMichael Kruse   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
93f2ec16ccSHideki Saito              << "LV: Interleaving disabled by the pass manager\n");
94f2ec16ccSHideki Saito }
95f2ec16ccSHideki Saito 
9677a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() {
9777a614a6SMichael Kruse   LLVMContext &Context = TheLoop->getHeader()->getContext();
9877a614a6SMichael Kruse 
9977a614a6SMichael Kruse   MDNode *IsVectorizedMD = MDNode::get(
10077a614a6SMichael Kruse       Context,
10177a614a6SMichael Kruse       {MDString::get(Context, "llvm.loop.isvectorized"),
10277a614a6SMichael Kruse        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
10377a614a6SMichael Kruse   MDNode *LoopID = TheLoop->getLoopID();
10477a614a6SMichael Kruse   MDNode *NewLoopID =
10577a614a6SMichael Kruse       makePostTransformationMetadata(Context, LoopID,
10677a614a6SMichael Kruse                                      {Twine(Prefix(), "vectorize.").str(),
10777a614a6SMichael Kruse                                       Twine(Prefix(), "interleave.").str()},
10877a614a6SMichael Kruse                                      {IsVectorizedMD});
10977a614a6SMichael Kruse   TheLoop->setLoopID(NewLoopID);
11077a614a6SMichael Kruse 
11177a614a6SMichael Kruse   // Update internal cache.
11277a614a6SMichael Kruse   IsVectorized.Value = 1;
11377a614a6SMichael Kruse }
11477a614a6SMichael Kruse 
115d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization(
116d4eb13c8SMichael Kruse     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
117f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled) {
118d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
119f2ec16ccSHideki Saito     emitRemarkWithHints();
120f2ec16ccSHideki Saito     return false;
121f2ec16ccSHideki Saito   }
122f2ec16ccSHideki Saito 
123d4eb13c8SMichael Kruse   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
124d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
125f2ec16ccSHideki Saito     emitRemarkWithHints();
126f2ec16ccSHideki Saito     return false;
127f2ec16ccSHideki Saito   }
128f2ec16ccSHideki Saito 
129f2ec16ccSHideki Saito   if (getIsVectorized() == 1) {
130d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
131f2ec16ccSHideki Saito     // FIXME: Add interleave.disable metadata. This will allow
132f2ec16ccSHideki Saito     // vectorize.disable to be used without disabling the pass and errors
133f2ec16ccSHideki Saito     // to differentiate between disabled vectorization and a width of 1.
134f2ec16ccSHideki Saito     ORE.emit([&]() {
135f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
136f2ec16ccSHideki Saito                                         "AllDisabled", L->getStartLoc(),
137f2ec16ccSHideki Saito                                         L->getHeader())
138f2ec16ccSHideki Saito              << "loop not vectorized: vectorization and interleaving are "
139f2ec16ccSHideki Saito                 "explicitly disabled, or the loop has already been "
140f2ec16ccSHideki Saito                 "vectorized";
141f2ec16ccSHideki Saito     });
142f2ec16ccSHideki Saito     return false;
143f2ec16ccSHideki Saito   }
144f2ec16ccSHideki Saito 
145f2ec16ccSHideki Saito   return true;
146f2ec16ccSHideki Saito }
147f2ec16ccSHideki Saito 
148f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const {
149f2ec16ccSHideki Saito   using namespace ore;
150f2ec16ccSHideki Saito 
151f2ec16ccSHideki Saito   ORE.emit([&]() {
152f2ec16ccSHideki Saito     if (Force.Value == LoopVectorizeHints::FK_Disabled)
153f2ec16ccSHideki Saito       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
154f2ec16ccSHideki Saito                                       TheLoop->getStartLoc(),
155f2ec16ccSHideki Saito                                       TheLoop->getHeader())
156f2ec16ccSHideki Saito              << "loop not vectorized: vectorization is explicitly disabled";
157f2ec16ccSHideki Saito     else {
158f2ec16ccSHideki Saito       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
159f2ec16ccSHideki Saito                                  TheLoop->getStartLoc(), TheLoop->getHeader());
160f2ec16ccSHideki Saito       R << "loop not vectorized";
161f2ec16ccSHideki Saito       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
162f2ec16ccSHideki Saito         R << " (Force=" << NV("Force", true);
163f2ec16ccSHideki Saito         if (Width.Value != 0)
164f2ec16ccSHideki Saito           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
165f2ec16ccSHideki Saito         if (Interleave.Value != 0)
166f2ec16ccSHideki Saito           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
167f2ec16ccSHideki Saito         R << ")";
168f2ec16ccSHideki Saito       }
169f2ec16ccSHideki Saito       return R;
170f2ec16ccSHideki Saito     }
171f2ec16ccSHideki Saito   });
172f2ec16ccSHideki Saito }
173f2ec16ccSHideki Saito 
174f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
175f2ec16ccSHideki Saito   if (getWidth() == 1)
176f2ec16ccSHideki Saito     return LV_NAME;
177f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled)
178f2ec16ccSHideki Saito     return LV_NAME;
179f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
180f2ec16ccSHideki Saito     return LV_NAME;
181f2ec16ccSHideki Saito   return OptimizationRemarkAnalysis::AlwaysPrint;
182f2ec16ccSHideki Saito }
183f2ec16ccSHideki Saito 
184f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() {
185f2ec16ccSHideki Saito   MDNode *LoopID = TheLoop->getLoopID();
186f2ec16ccSHideki Saito   if (!LoopID)
187f2ec16ccSHideki Saito     return;
188f2ec16ccSHideki Saito 
189f2ec16ccSHideki Saito   // First operand should refer to the loop id itself.
190f2ec16ccSHideki Saito   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
191f2ec16ccSHideki Saito   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
192f2ec16ccSHideki Saito 
193f2ec16ccSHideki Saito   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
194f2ec16ccSHideki Saito     const MDString *S = nullptr;
195f2ec16ccSHideki Saito     SmallVector<Metadata *, 4> Args;
196f2ec16ccSHideki Saito 
197f2ec16ccSHideki Saito     // The expected hint is either a MDString or a MDNode with the first
198f2ec16ccSHideki Saito     // operand a MDString.
199f2ec16ccSHideki Saito     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
200f2ec16ccSHideki Saito       if (!MD || MD->getNumOperands() == 0)
201f2ec16ccSHideki Saito         continue;
202f2ec16ccSHideki Saito       S = dyn_cast<MDString>(MD->getOperand(0));
203f2ec16ccSHideki Saito       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
204f2ec16ccSHideki Saito         Args.push_back(MD->getOperand(i));
205f2ec16ccSHideki Saito     } else {
206f2ec16ccSHideki Saito       S = dyn_cast<MDString>(LoopID->getOperand(i));
207f2ec16ccSHideki Saito       assert(Args.size() == 0 && "too many arguments for MDString");
208f2ec16ccSHideki Saito     }
209f2ec16ccSHideki Saito 
210f2ec16ccSHideki Saito     if (!S)
211f2ec16ccSHideki Saito       continue;
212f2ec16ccSHideki Saito 
213f2ec16ccSHideki Saito     // Check if the hint starts with the loop metadata prefix.
214f2ec16ccSHideki Saito     StringRef Name = S->getString();
215f2ec16ccSHideki Saito     if (Args.size() == 1)
216f2ec16ccSHideki Saito       setHint(Name, Args[0]);
217f2ec16ccSHideki Saito   }
218f2ec16ccSHideki Saito }
219f2ec16ccSHideki Saito 
220f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
221f2ec16ccSHideki Saito   if (!Name.startswith(Prefix()))
222f2ec16ccSHideki Saito     return;
223f2ec16ccSHideki Saito   Name = Name.substr(Prefix().size(), StringRef::npos);
224f2ec16ccSHideki Saito 
225f2ec16ccSHideki Saito   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
226f2ec16ccSHideki Saito   if (!C)
227f2ec16ccSHideki Saito     return;
228f2ec16ccSHideki Saito   unsigned Val = C->getZExtValue();
229f2ec16ccSHideki Saito 
23020b198ecSSjoerd Meijer   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
231f2ec16ccSHideki Saito   for (auto H : Hints) {
232f2ec16ccSHideki Saito     if (Name == H->Name) {
233f2ec16ccSHideki Saito       if (H->validate(Val))
234f2ec16ccSHideki Saito         H->Value = Val;
235f2ec16ccSHideki Saito       else
236d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
237f2ec16ccSHideki Saito       break;
238f2ec16ccSHideki Saito     }
239f2ec16ccSHideki Saito   }
240f2ec16ccSHideki Saito }
241f2ec16ccSHideki Saito 
242f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet(
243f2ec16ccSHideki Saito     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
244f2ec16ccSHideki Saito   const char *PassName = Hints.vectorizeAnalysisPassName();
245f2ec16ccSHideki Saito   bool Failed = false;
246f2ec16ccSHideki Saito   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
247f2ec16ccSHideki Saito     ORE.emit([&]() {
248f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisFPCommute(
249f2ec16ccSHideki Saito                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
250f2ec16ccSHideki Saito                  UnsafeAlgebraInst->getParent())
251f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
252f2ec16ccSHideki Saito                 "floating-point operations";
253f2ec16ccSHideki Saito     });
254f2ec16ccSHideki Saito     Failed = true;
255f2ec16ccSHideki Saito   }
256f2ec16ccSHideki Saito 
257f2ec16ccSHideki Saito   // Test if runtime memcheck thresholds are exceeded.
258f2ec16ccSHideki Saito   bool PragmaThresholdReached =
259f2ec16ccSHideki Saito       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
260f2ec16ccSHideki Saito   bool ThresholdReached =
261f2ec16ccSHideki Saito       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
262f2ec16ccSHideki Saito   if ((ThresholdReached && !Hints.allowReordering()) ||
263f2ec16ccSHideki Saito       PragmaThresholdReached) {
264f2ec16ccSHideki Saito     ORE.emit([&]() {
265f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
266f2ec16ccSHideki Saito                                                 L->getStartLoc(),
267f2ec16ccSHideki Saito                                                 L->getHeader())
268f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
269f2ec16ccSHideki Saito                 "memory operations";
270f2ec16ccSHideki Saito     });
271d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
272f2ec16ccSHideki Saito     Failed = true;
273f2ec16ccSHideki Saito   }
274f2ec16ccSHideki Saito 
275f2ec16ccSHideki Saito   return Failed;
276f2ec16ccSHideki Saito }
277f2ec16ccSHideki Saito 
278f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop
279f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
280f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is
281f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is
282f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions:
283f2ec16ccSHideki Saito //   1) it has a canonical IV (starting from 0 and with stride 1),
284f2ec16ccSHideki Saito //   2) its latch terminator is a conditional branch and,
285f2ec16ccSHideki Saito //   3) its latch condition is a compare instruction whose operands are the
286f2ec16ccSHideki Saito //      canonical IV and an OuterLp invariant.
287f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not
288f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity.
289f2ec16ccSHideki Saito //
290f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation
291f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop
292f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and
293f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for
294f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we
295f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer
296f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit
297f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away
298f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not
299f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate
300f2ec16ccSHideki Saito // function that is only executed once for each \p Lp.
301f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
302f2ec16ccSHideki Saito   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
303f2ec16ccSHideki Saito 
304f2ec16ccSHideki Saito   // If Lp is the outer loop, it's uniform by definition.
305f2ec16ccSHideki Saito   if (Lp == OuterLp)
306f2ec16ccSHideki Saito     return true;
307f2ec16ccSHideki Saito   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
308f2ec16ccSHideki Saito 
309f2ec16ccSHideki Saito   // 1.
310f2ec16ccSHideki Saito   PHINode *IV = Lp->getCanonicalInductionVariable();
311f2ec16ccSHideki Saito   if (!IV) {
312d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
313f2ec16ccSHideki Saito     return false;
314f2ec16ccSHideki Saito   }
315f2ec16ccSHideki Saito 
316f2ec16ccSHideki Saito   // 2.
317f2ec16ccSHideki Saito   BasicBlock *Latch = Lp->getLoopLatch();
318f2ec16ccSHideki Saito   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
319f2ec16ccSHideki Saito   if (!LatchBr || LatchBr->isUnconditional()) {
320d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
321f2ec16ccSHideki Saito     return false;
322f2ec16ccSHideki Saito   }
323f2ec16ccSHideki Saito 
324f2ec16ccSHideki Saito   // 3.
325f2ec16ccSHideki Saito   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
326f2ec16ccSHideki Saito   if (!LatchCmp) {
327d34e60caSNicola Zaghen     LLVM_DEBUG(
328d34e60caSNicola Zaghen         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
329f2ec16ccSHideki Saito     return false;
330f2ec16ccSHideki Saito   }
331f2ec16ccSHideki Saito 
332f2ec16ccSHideki Saito   Value *CondOp0 = LatchCmp->getOperand(0);
333f2ec16ccSHideki Saito   Value *CondOp1 = LatchCmp->getOperand(1);
334f2ec16ccSHideki Saito   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
335f2ec16ccSHideki Saito   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
336f2ec16ccSHideki Saito       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
337d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
338f2ec16ccSHideki Saito     return false;
339f2ec16ccSHideki Saito   }
340f2ec16ccSHideki Saito 
341f2ec16ccSHideki Saito   return true;
342f2ec16ccSHideki Saito }
343f2ec16ccSHideki Saito 
344f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p
345f2ec16ccSHideki Saito // OuterLp.
346f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
347f2ec16ccSHideki Saito   if (!isUniformLoop(Lp, OuterLp))
348f2ec16ccSHideki Saito     return false;
349f2ec16ccSHideki Saito 
350f2ec16ccSHideki Saito   // Check if nested loops are uniform.
351f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
352f2ec16ccSHideki Saito     if (!isUniformLoopNest(SubLp, OuterLp))
353f2ec16ccSHideki Saito       return false;
354f2ec16ccSHideki Saito 
355f2ec16ccSHideki Saito   return true;
356f2ec16ccSHideki Saito }
357f2ec16ccSHideki Saito 
3585f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node.
359f2ec16ccSHideki Saito ///
360f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if
361f2ec16ccSHideki Saito /// convert.
362f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) {
363f2ec16ccSHideki Saito   for (PHINode &Phi : BB->phis()) {
364f2ec16ccSHideki Saito     for (Value *V : Phi.incoming_values())
365f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(V))
366f2ec16ccSHideki Saito         if (C->canTrap())
367f2ec16ccSHideki Saito           return false;
368f2ec16ccSHideki Saito   }
369f2ec16ccSHideki Saito   return true;
370f2ec16ccSHideki Saito }
371f2ec16ccSHideki Saito 
372f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
373f2ec16ccSHideki Saito   if (Ty->isPointerTy())
374f2ec16ccSHideki Saito     return DL.getIntPtrType(Ty);
375f2ec16ccSHideki Saito 
376f2ec16ccSHideki Saito   // It is possible that char's or short's overflow when we ask for the loop's
377f2ec16ccSHideki Saito   // trip count, work around this by changing the type size.
378f2ec16ccSHideki Saito   if (Ty->getScalarSizeInBits() < 32)
379f2ec16ccSHideki Saito     return Type::getInt32Ty(Ty->getContext());
380f2ec16ccSHideki Saito 
381f2ec16ccSHideki Saito   return Ty;
382f2ec16ccSHideki Saito }
383f2ec16ccSHideki Saito 
384f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
385f2ec16ccSHideki Saito   Ty0 = convertPointerToIntegerType(DL, Ty0);
386f2ec16ccSHideki Saito   Ty1 = convertPointerToIntegerType(DL, Ty1);
387f2ec16ccSHideki Saito   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
388f2ec16ccSHideki Saito     return Ty0;
389f2ec16ccSHideki Saito   return Ty1;
390f2ec16ccSHideki Saito }
391f2ec16ccSHideki Saito 
3925f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an
393f2ec16ccSHideki Saito /// identified reduction variable.
394f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
395f2ec16ccSHideki Saito                                SmallPtrSetImpl<Value *> &AllowedExit) {
39660a1e4ddSAnna Thomas   // Reductions, Inductions and non-header phis are allowed to have exit users. All
397f2ec16ccSHideki Saito   // other instructions must not have external users.
398f2ec16ccSHideki Saito   if (!AllowedExit.count(Inst))
399f2ec16ccSHideki Saito     // Check that all of the users of the loop are inside the BB.
400f2ec16ccSHideki Saito     for (User *U : Inst->users()) {
401f2ec16ccSHideki Saito       Instruction *UI = cast<Instruction>(U);
402f2ec16ccSHideki Saito       // This user may be a reduction exit value.
403f2ec16ccSHideki Saito       if (!TheLoop->contains(UI)) {
404d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
405f2ec16ccSHideki Saito         return true;
406f2ec16ccSHideki Saito       }
407f2ec16ccSHideki Saito     }
408f2ec16ccSHideki Saito   return false;
409f2ec16ccSHideki Saito }
410f2ec16ccSHideki Saito 
411f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
412f2ec16ccSHideki Saito   const ValueToValueMap &Strides =
413f2ec16ccSHideki Saito       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
414f2ec16ccSHideki Saito 
415d1170dbeSSjoerd Meijer   bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
416d1170dbeSSjoerd Meijer   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
417f2ec16ccSHideki Saito   if (Stride == 1 || Stride == -1)
418f2ec16ccSHideki Saito     return Stride;
419f2ec16ccSHideki Saito   return 0;
420f2ec16ccSHideki Saito }
421f2ec16ccSHideki Saito 
422f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) {
423f2ec16ccSHideki Saito   return LAI->isUniform(V);
424f2ec16ccSHideki Saito }
425f2ec16ccSHideki Saito 
426f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() {
427f2ec16ccSHideki Saito   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
428f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
429f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
430f2ec16ccSHideki Saito   bool Result = true;
431f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
432f2ec16ccSHideki Saito 
433f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
434f2ec16ccSHideki Saito     // Check whether the BB terminator is a BranchInst. Any other terminator is
435f2ec16ccSHideki Saito     // not supported yet.
436f2ec16ccSHideki Saito     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
437f2ec16ccSHideki Saito     if (!Br) {
4389e97caf5SRenato Golin       reportVectorizationFailure("Unsupported basic block terminator",
4399e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
440ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
441f2ec16ccSHideki Saito       if (DoExtraAnalysis)
442f2ec16ccSHideki Saito         Result = false;
443f2ec16ccSHideki Saito       else
444f2ec16ccSHideki Saito         return false;
445f2ec16ccSHideki Saito     }
446f2ec16ccSHideki Saito 
447f2ec16ccSHideki Saito     // Check whether the BranchInst is a supported one. Only unconditional
448f2ec16ccSHideki Saito     // branches, conditional branches with an outer loop invariant condition or
449f2ec16ccSHideki Saito     // backedges are supported.
4504e4ecae0SHideki Saito     // FIXME: We skip these checks when VPlan predication is enabled as we
4514e4ecae0SHideki Saito     // want to allow divergent branches. This whole check will be removed
4524e4ecae0SHideki Saito     // once VPlan predication is on by default.
4534e4ecae0SHideki Saito     if (!EnableVPlanPredication && Br && Br->isConditional() &&
454f2ec16ccSHideki Saito         !TheLoop->isLoopInvariant(Br->getCondition()) &&
455f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(0)) &&
456f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(1))) {
4579e97caf5SRenato Golin       reportVectorizationFailure("Unsupported conditional branch",
4589e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
459ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
460f2ec16ccSHideki Saito       if (DoExtraAnalysis)
461f2ec16ccSHideki Saito         Result = false;
462f2ec16ccSHideki Saito       else
463f2ec16ccSHideki Saito         return false;
464f2ec16ccSHideki Saito     }
465f2ec16ccSHideki Saito   }
466f2ec16ccSHideki Saito 
467f2ec16ccSHideki Saito   // Check whether inner loops are uniform. At this point, we only support
468f2ec16ccSHideki Saito   // simple outer loops scenarios with uniform nested loops.
469f2ec16ccSHideki Saito   if (!isUniformLoopNest(TheLoop /*loop nest*/,
470f2ec16ccSHideki Saito                          TheLoop /*context outer loop*/)) {
4719e97caf5SRenato Golin     reportVectorizationFailure("Outer loop contains divergent loops",
4729e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
473ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
474f2ec16ccSHideki Saito     if (DoExtraAnalysis)
475f2ec16ccSHideki Saito       Result = false;
476f2ec16ccSHideki Saito     else
477f2ec16ccSHideki Saito       return false;
478f2ec16ccSHideki Saito   }
479f2ec16ccSHideki Saito 
480ea7f3035SHideki Saito   // Check whether we are able to set up outer loop induction.
481ea7f3035SHideki Saito   if (!setupOuterLoopInductions()) {
4829e97caf5SRenato Golin     reportVectorizationFailure("Unsupported outer loop Phi(s)",
4839e97caf5SRenato Golin                                "Unsupported outer loop Phi(s)",
484ec818d7fSHideki Saito                                "UnsupportedPhi", ORE, TheLoop);
485ea7f3035SHideki Saito     if (DoExtraAnalysis)
486ea7f3035SHideki Saito       Result = false;
487ea7f3035SHideki Saito     else
488ea7f3035SHideki Saito       return false;
489ea7f3035SHideki Saito   }
490ea7f3035SHideki Saito 
491f2ec16ccSHideki Saito   return Result;
492f2ec16ccSHideki Saito }
493f2ec16ccSHideki Saito 
494f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi(
495f2ec16ccSHideki Saito     PHINode *Phi, const InductionDescriptor &ID,
496f2ec16ccSHideki Saito     SmallPtrSetImpl<Value *> &AllowedExit) {
497f2ec16ccSHideki Saito   Inductions[Phi] = ID;
498f2ec16ccSHideki Saito 
499f2ec16ccSHideki Saito   // In case this induction also comes with casts that we know we can ignore
500f2ec16ccSHideki Saito   // in the vectorized loop body, record them here. All casts could be recorded
501f2ec16ccSHideki Saito   // here for ignoring, but suffices to record only the first (as it is the
502f2ec16ccSHideki Saito   // only one that may bw used outside the cast sequence).
503f2ec16ccSHideki Saito   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
504f2ec16ccSHideki Saito   if (!Casts.empty())
505f2ec16ccSHideki Saito     InductionCastsToIgnore.insert(*Casts.begin());
506f2ec16ccSHideki Saito 
507f2ec16ccSHideki Saito   Type *PhiTy = Phi->getType();
508f2ec16ccSHideki Saito   const DataLayout &DL = Phi->getModule()->getDataLayout();
509f2ec16ccSHideki Saito 
510f2ec16ccSHideki Saito   // Get the widest type.
511f2ec16ccSHideki Saito   if (!PhiTy->isFloatingPointTy()) {
512f2ec16ccSHideki Saito     if (!WidestIndTy)
513f2ec16ccSHideki Saito       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
514f2ec16ccSHideki Saito     else
515f2ec16ccSHideki Saito       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
516f2ec16ccSHideki Saito   }
517f2ec16ccSHideki Saito 
518f2ec16ccSHideki Saito   // Int inductions are special because we only allow one IV.
519f2ec16ccSHideki Saito   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
520f2ec16ccSHideki Saito       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
521f2ec16ccSHideki Saito       isa<Constant>(ID.getStartValue()) &&
522f2ec16ccSHideki Saito       cast<Constant>(ID.getStartValue())->isNullValue()) {
523f2ec16ccSHideki Saito 
524f2ec16ccSHideki Saito     // Use the phi node with the widest type as induction. Use the last
525f2ec16ccSHideki Saito     // one if there are multiple (no good reason for doing this other
526f2ec16ccSHideki Saito     // than it is expedient). We've checked that it begins at zero and
527f2ec16ccSHideki Saito     // steps by one, so this is a canonical induction variable.
528f2ec16ccSHideki Saito     if (!PrimaryInduction || PhiTy == WidestIndTy)
529f2ec16ccSHideki Saito       PrimaryInduction = Phi;
530f2ec16ccSHideki Saito   }
531f2ec16ccSHideki Saito 
532f2ec16ccSHideki Saito   // Both the PHI node itself, and the "post-increment" value feeding
533f2ec16ccSHideki Saito   // back into the PHI node may have external users.
534f2ec16ccSHideki Saito   // We can allow those uses, except if the SCEVs we have for them rely
535f2ec16ccSHideki Saito   // on predicates that only hold within the loop, since allowing the exit
5366a1dd77fSAnna Thomas   // currently means re-using this SCEV outside the loop (see PR33706 for more
5376a1dd77fSAnna Thomas   // details).
538f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().isAlwaysTrue()) {
539f2ec16ccSHideki Saito     AllowedExit.insert(Phi);
540f2ec16ccSHideki Saito     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
541f2ec16ccSHideki Saito   }
542f2ec16ccSHideki Saito 
543d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
544f2ec16ccSHideki Saito }
545f2ec16ccSHideki Saito 
546ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() {
547ea7f3035SHideki Saito   BasicBlock *Header = TheLoop->getHeader();
548ea7f3035SHideki Saito 
549ea7f3035SHideki Saito   // Returns true if a given Phi is a supported induction.
550ea7f3035SHideki Saito   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
551ea7f3035SHideki Saito     InductionDescriptor ID;
552ea7f3035SHideki Saito     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
553ea7f3035SHideki Saito         ID.getKind() == InductionDescriptor::IK_IntInduction) {
554ea7f3035SHideki Saito       addInductionPhi(&Phi, ID, AllowedExit);
555ea7f3035SHideki Saito       return true;
556ea7f3035SHideki Saito     } else {
557ea7f3035SHideki Saito       // Bail out for any Phi in the outer loop header that is not a supported
558ea7f3035SHideki Saito       // induction.
559ea7f3035SHideki Saito       LLVM_DEBUG(
560ea7f3035SHideki Saito           dbgs()
561ea7f3035SHideki Saito           << "LV: Found unsupported PHI for outer loop vectorization.\n");
562ea7f3035SHideki Saito       return false;
563ea7f3035SHideki Saito     }
564ea7f3035SHideki Saito   };
565ea7f3035SHideki Saito 
566ea7f3035SHideki Saito   if (llvm::all_of(Header->phis(), isSupportedPhi))
567ea7f3035SHideki Saito     return true;
568ea7f3035SHideki Saito   else
569ea7f3035SHideki Saito     return false;
570ea7f3035SHideki Saito }
571ea7f3035SHideki Saito 
57266c120f0SFrancesco Petrogalli /// Checks if a function is scalarizable according to the TLI, in
57366c120f0SFrancesco Petrogalli /// the sense that it should be vectorized and then expanded in
57466c120f0SFrancesco Petrogalli /// multiple scalarcalls. This is represented in the
57566c120f0SFrancesco Petrogalli /// TLI via mappings that do not specify a vector name, as in the
57666c120f0SFrancesco Petrogalli /// following example:
57766c120f0SFrancesco Petrogalli ///
57866c120f0SFrancesco Petrogalli ///    const VecDesc VecIntrinsics[] = {
57966c120f0SFrancesco Petrogalli ///      {"llvm.phx.abs.i32", "", 4}
58066c120f0SFrancesco Petrogalli ///    };
58166c120f0SFrancesco Petrogalli static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
58266c120f0SFrancesco Petrogalli   const StringRef ScalarName = CI.getCalledFunction()->getName();
58366c120f0SFrancesco Petrogalli   bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
58466c120f0SFrancesco Petrogalli   // Check that all known VFs are not associated to a vector
58566c120f0SFrancesco Petrogalli   // function, i.e. the vector name is emty.
58666c120f0SFrancesco Petrogalli   if (Scalarize)
58766c120f0SFrancesco Petrogalli     for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName);
58866c120f0SFrancesco Petrogalli          VF <= WidestVF; VF *= 2) {
58966c120f0SFrancesco Petrogalli       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
59066c120f0SFrancesco Petrogalli     }
59166c120f0SFrancesco Petrogalli   return Scalarize;
59266c120f0SFrancesco Petrogalli }
59366c120f0SFrancesco Petrogalli 
594f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() {
595f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
596f2ec16ccSHideki Saito 
597f2ec16ccSHideki Saito   // Look for the attribute signaling the absence of NaNs.
598f2ec16ccSHideki Saito   Function &F = *Header->getParent();
599f2ec16ccSHideki Saito   HasFunNoNaNAttr =
600f2ec16ccSHideki Saito       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
601f2ec16ccSHideki Saito 
602f2ec16ccSHideki Saito   // For each block in the loop.
603f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
604f2ec16ccSHideki Saito     // Scan the instructions in the block and look for hazards.
605f2ec16ccSHideki Saito     for (Instruction &I : *BB) {
606f2ec16ccSHideki Saito       if (auto *Phi = dyn_cast<PHINode>(&I)) {
607f2ec16ccSHideki Saito         Type *PhiTy = Phi->getType();
608f2ec16ccSHideki Saito         // Check that this PHI type is allowed.
609f2ec16ccSHideki Saito         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
610f2ec16ccSHideki Saito             !PhiTy->isPointerTy()) {
6119e97caf5SRenato Golin           reportVectorizationFailure("Found a non-int non-pointer PHI",
6129e97caf5SRenato Golin                                      "loop control flow is not understood by vectorizer",
613ec818d7fSHideki Saito                                      "CFGNotUnderstood", ORE, TheLoop);
614f2ec16ccSHideki Saito           return false;
615f2ec16ccSHideki Saito         }
616f2ec16ccSHideki Saito 
617f2ec16ccSHideki Saito         // If this PHINode is not in the header block, then we know that we
618f2ec16ccSHideki Saito         // can convert it to select during if-conversion. No need to check if
619f2ec16ccSHideki Saito         // the PHIs in this block are induction or reduction variables.
620f2ec16ccSHideki Saito         if (BB != Header) {
62160a1e4ddSAnna Thomas           // Non-header phi nodes that have outside uses can be vectorized. Add
62260a1e4ddSAnna Thomas           // them to the list of allowed exits.
62360a1e4ddSAnna Thomas           // Unsafe cyclic dependencies with header phis are identified during
62460a1e4ddSAnna Thomas           // legalization for reduction, induction and first order
62560a1e4ddSAnna Thomas           // recurrences.
626dd18ce45SBjorn Pettersson           AllowedExit.insert(&I);
627f2ec16ccSHideki Saito           continue;
628f2ec16ccSHideki Saito         }
629f2ec16ccSHideki Saito 
630f2ec16ccSHideki Saito         // We only allow if-converted PHIs with exactly two incoming values.
631f2ec16ccSHideki Saito         if (Phi->getNumIncomingValues() != 2) {
6329e97caf5SRenato Golin           reportVectorizationFailure("Found an invalid PHI",
6339e97caf5SRenato Golin               "loop control flow is not understood by vectorizer",
634ec818d7fSHideki Saito               "CFGNotUnderstood", ORE, TheLoop, Phi);
635f2ec16ccSHideki Saito           return false;
636f2ec16ccSHideki Saito         }
637f2ec16ccSHideki Saito 
638f2ec16ccSHideki Saito         RecurrenceDescriptor RedDes;
639f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
640f2ec16ccSHideki Saito                                                  DT)) {
641f2ec16ccSHideki Saito           if (RedDes.hasUnsafeAlgebra())
642f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
643f2ec16ccSHideki Saito           AllowedExit.insert(RedDes.getLoopExitInstr());
644f2ec16ccSHideki Saito           Reductions[Phi] = RedDes;
645f2ec16ccSHideki Saito           continue;
646f2ec16ccSHideki Saito         }
647f2ec16ccSHideki Saito 
648b02b0ad8SAnna Thomas         // TODO: Instead of recording the AllowedExit, it would be good to record the
649b02b0ad8SAnna Thomas         // complementary set: NotAllowedExit. These include (but may not be
650b02b0ad8SAnna Thomas         // limited to):
651b02b0ad8SAnna Thomas         // 1. Reduction phis as they represent the one-before-last value, which
652b02b0ad8SAnna Thomas         // is not available when vectorized
653b02b0ad8SAnna Thomas         // 2. Induction phis and increment when SCEV predicates cannot be used
654b02b0ad8SAnna Thomas         // outside the loop - see addInductionPhi
655b02b0ad8SAnna Thomas         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
656b02b0ad8SAnna Thomas         // outside the loop - see call to hasOutsideLoopUser in the non-phi
657b02b0ad8SAnna Thomas         // handling below
658b02b0ad8SAnna Thomas         // 4. FirstOrderRecurrence phis that can possibly be handled by
659b02b0ad8SAnna Thomas         // extraction.
660b02b0ad8SAnna Thomas         // By recording these, we can then reason about ways to vectorize each
661b02b0ad8SAnna Thomas         // of these NotAllowedExit.
662f2ec16ccSHideki Saito         InductionDescriptor ID;
663f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
664f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
665f2ec16ccSHideki Saito           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
666f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
667f2ec16ccSHideki Saito           continue;
668f2ec16ccSHideki Saito         }
669f2ec16ccSHideki Saito 
670f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
671f2ec16ccSHideki Saito                                                          SinkAfter, DT)) {
6728e0c5f72SAyal Zaks           AllowedExit.insert(Phi);
673f2ec16ccSHideki Saito           FirstOrderRecurrences.insert(Phi);
674f2ec16ccSHideki Saito           continue;
675f2ec16ccSHideki Saito         }
676f2ec16ccSHideki Saito 
677f2ec16ccSHideki Saito         // As a last resort, coerce the PHI to a AddRec expression
678f2ec16ccSHideki Saito         // and re-try classifying it a an induction PHI.
679f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
680f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
681f2ec16ccSHideki Saito           continue;
682f2ec16ccSHideki Saito         }
683f2ec16ccSHideki Saito 
6849e97caf5SRenato Golin         reportVectorizationFailure("Found an unidentified PHI",
6859e97caf5SRenato Golin             "value that could not be identified as "
6869e97caf5SRenato Golin             "reduction is used outside the loop",
687ec818d7fSHideki Saito             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
688f2ec16ccSHideki Saito         return false;
689f2ec16ccSHideki Saito       } // end of PHI handling
690f2ec16ccSHideki Saito 
691f2ec16ccSHideki Saito       // We handle calls that:
692f2ec16ccSHideki Saito       //   * Are debug info intrinsics.
693f2ec16ccSHideki Saito       //   * Have a mapping to an IR intrinsic.
694f2ec16ccSHideki Saito       //   * Have a vector version available.
695f2ec16ccSHideki Saito       auto *CI = dyn_cast<CallInst>(&I);
69666c120f0SFrancesco Petrogalli 
697f2ec16ccSHideki Saito       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
698f2ec16ccSHideki Saito           !isa<DbgInfoIntrinsic>(CI) &&
699f2ec16ccSHideki Saito           !(CI->getCalledFunction() && TLI &&
70066c120f0SFrancesco Petrogalli             (!VFDatabase::getMappings(*CI).empty() ||
70166c120f0SFrancesco Petrogalli              isTLIScalarize(*TLI, *CI)))) {
7027d65fe5cSSanjay Patel         // If the call is a recognized math libary call, it is likely that
7037d65fe5cSSanjay Patel         // we can vectorize it given loosened floating-point constraints.
7047d65fe5cSSanjay Patel         LibFunc Func;
7057d65fe5cSSanjay Patel         bool IsMathLibCall =
7067d65fe5cSSanjay Patel             TLI && CI->getCalledFunction() &&
7077d65fe5cSSanjay Patel             CI->getType()->isFloatingPointTy() &&
7087d65fe5cSSanjay Patel             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
7097d65fe5cSSanjay Patel             TLI->hasOptimizedCodeGen(Func);
7107d65fe5cSSanjay Patel 
7117d65fe5cSSanjay Patel         if (IsMathLibCall) {
7127d65fe5cSSanjay Patel           // TODO: Ideally, we should not use clang-specific language here,
7137d65fe5cSSanjay Patel           // but it's hard to provide meaningful yet generic advice.
7147d65fe5cSSanjay Patel           // Also, should this be guarded by allowExtraAnalysis() and/or be part
7157d65fe5cSSanjay Patel           // of the returned info from isFunctionVectorizable()?
71666c120f0SFrancesco Petrogalli           reportVectorizationFailure(
71766c120f0SFrancesco Petrogalli               "Found a non-intrinsic callsite",
7189e97caf5SRenato Golin               "library call cannot be vectorized. "
7197d65fe5cSSanjay Patel               "Try compiling with -fno-math-errno, -ffast-math, "
7209e97caf5SRenato Golin               "or similar flags",
721ec818d7fSHideki Saito               "CantVectorizeLibcall", ORE, TheLoop, CI);
7227d65fe5cSSanjay Patel         } else {
7239e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
7249e97caf5SRenato Golin                                      "call instruction cannot be vectorized",
725ec818d7fSHideki Saito                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
7267d65fe5cSSanjay Patel         }
727f2ec16ccSHideki Saito         return false;
728f2ec16ccSHideki Saito       }
729f2ec16ccSHideki Saito 
730a066f1f9SSimon Pilgrim       // Some intrinsics have scalar arguments and should be same in order for
731a066f1f9SSimon Pilgrim       // them to be vectorized (i.e. loop invariant).
732a066f1f9SSimon Pilgrim       if (CI) {
733f2ec16ccSHideki Saito         auto *SE = PSE.getSE();
734a066f1f9SSimon Pilgrim         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
735a066f1f9SSimon Pilgrim         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
736a066f1f9SSimon Pilgrim           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
737a066f1f9SSimon Pilgrim             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
7389e97caf5SRenato Golin               reportVectorizationFailure("Found unvectorizable intrinsic",
7399e97caf5SRenato Golin                   "intrinsic instruction cannot be vectorized",
740ec818d7fSHideki Saito                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
741f2ec16ccSHideki Saito               return false;
742f2ec16ccSHideki Saito             }
743f2ec16ccSHideki Saito           }
744a066f1f9SSimon Pilgrim       }
745f2ec16ccSHideki Saito 
746f2ec16ccSHideki Saito       // Check that the instruction return type is vectorizable.
747f2ec16ccSHideki Saito       // Also, we can't vectorize extractelement instructions.
748f2ec16ccSHideki Saito       if ((!VectorType::isValidElementType(I.getType()) &&
749f2ec16ccSHideki Saito            !I.getType()->isVoidTy()) ||
750f2ec16ccSHideki Saito           isa<ExtractElementInst>(I)) {
7519e97caf5SRenato Golin         reportVectorizationFailure("Found unvectorizable type",
7529e97caf5SRenato Golin             "instruction return type cannot be vectorized",
753ec818d7fSHideki Saito             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
754f2ec16ccSHideki Saito         return false;
755f2ec16ccSHideki Saito       }
756f2ec16ccSHideki Saito 
757f2ec16ccSHideki Saito       // Check that the stored type is vectorizable.
758f2ec16ccSHideki Saito       if (auto *ST = dyn_cast<StoreInst>(&I)) {
759f2ec16ccSHideki Saito         Type *T = ST->getValueOperand()->getType();
760f2ec16ccSHideki Saito         if (!VectorType::isValidElementType(T)) {
7619e97caf5SRenato Golin           reportVectorizationFailure("Store instruction cannot be vectorized",
7629e97caf5SRenato Golin                                      "store instruction cannot be vectorized",
763ec818d7fSHideki Saito                                      "CantVectorizeStore", ORE, TheLoop, ST);
764f2ec16ccSHideki Saito           return false;
765f2ec16ccSHideki Saito         }
766f2ec16ccSHideki Saito 
7676452bdd2SWarren Ristow         // For nontemporal stores, check that a nontemporal vector version is
7686452bdd2SWarren Ristow         // supported on the target.
7696452bdd2SWarren Ristow         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
7706452bdd2SWarren Ristow           // Arbitrarily try a vector of 2 elements.
771d2befc66SChristopher Tetreault           auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
7726452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of stored type");
77352e98f62SNikita Popov           if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
7746452bdd2SWarren Ristow             reportVectorizationFailure(
7756452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
7766452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
777ec818d7fSHideki Saito                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
7786452bdd2SWarren Ristow             return false;
7796452bdd2SWarren Ristow           }
7806452bdd2SWarren Ristow         }
7816452bdd2SWarren Ristow 
7826452bdd2SWarren Ristow       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
7836452bdd2SWarren Ristow         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
7846452bdd2SWarren Ristow           // For nontemporal loads, check that a nontemporal vector version is
7856452bdd2SWarren Ristow           // supported on the target (arbitrarily try a vector of 2 elements).
786d2befc66SChristopher Tetreault           auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
7876452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of load type");
78852e98f62SNikita Popov           if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
7896452bdd2SWarren Ristow             reportVectorizationFailure(
7906452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
7916452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
792ec818d7fSHideki Saito                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
7936452bdd2SWarren Ristow             return false;
7946452bdd2SWarren Ristow           }
7956452bdd2SWarren Ristow         }
7966452bdd2SWarren Ristow 
797f2ec16ccSHideki Saito         // FP instructions can allow unsafe algebra, thus vectorizable by
798f2ec16ccSHideki Saito         // non-IEEE-754 compliant SIMD units.
799f2ec16ccSHideki Saito         // This applies to floating-point math operations and calls, not memory
800f2ec16ccSHideki Saito         // operations, shuffles, or casts, as they don't change precision or
801f2ec16ccSHideki Saito         // semantics.
802f2ec16ccSHideki Saito       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
803f2ec16ccSHideki Saito                  !I.isFast()) {
804d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
805f2ec16ccSHideki Saito         Hints->setPotentiallyUnsafe();
806f2ec16ccSHideki Saito       }
807f2ec16ccSHideki Saito 
808f2ec16ccSHideki Saito       // Reduction instructions are allowed to have exit users.
809f2ec16ccSHideki Saito       // All other instructions must not have external users.
810f2ec16ccSHideki Saito       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
811b02b0ad8SAnna Thomas         // We can safely vectorize loops where instructions within the loop are
812b02b0ad8SAnna Thomas         // used outside the loop only if the SCEV predicates within the loop is
813b02b0ad8SAnna Thomas         // same as outside the loop. Allowing the exit means reusing the SCEV
814b02b0ad8SAnna Thomas         // outside the loop.
815b02b0ad8SAnna Thomas         if (PSE.getUnionPredicate().isAlwaysTrue()) {
816b02b0ad8SAnna Thomas           AllowedExit.insert(&I);
817b02b0ad8SAnna Thomas           continue;
818b02b0ad8SAnna Thomas         }
8199e97caf5SRenato Golin         reportVectorizationFailure("Value cannot be used outside the loop",
8209e97caf5SRenato Golin                                    "value cannot be used outside the loop",
821ec818d7fSHideki Saito                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
822f2ec16ccSHideki Saito         return false;
823f2ec16ccSHideki Saito       }
824f2ec16ccSHideki Saito     } // next instr.
825f2ec16ccSHideki Saito   }
826f2ec16ccSHideki Saito 
827f2ec16ccSHideki Saito   if (!PrimaryInduction) {
828f2ec16ccSHideki Saito     if (Inductions.empty()) {
8299e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8309e97caf5SRenato Golin           "loop induction variable could not be identified",
831ec818d7fSHideki Saito           "NoInductionVariable", ORE, TheLoop);
832f2ec16ccSHideki Saito       return false;
8334f27730eSWarren Ristow     } else if (!WidestIndTy) {
8349e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8359e97caf5SRenato Golin           "integer loop induction variable could not be identified",
836ec818d7fSHideki Saito           "NoIntegerInductionVariable", ORE, TheLoop);
8374f27730eSWarren Ristow       return false;
8389e97caf5SRenato Golin     } else {
8399e97caf5SRenato Golin       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
840f2ec16ccSHideki Saito     }
841f2ec16ccSHideki Saito   }
842f2ec16ccSHideki Saito 
8439d24933fSFlorian Hahn   // For first order recurrences, we use the previous value (incoming value from
8449d24933fSFlorian Hahn   // the latch) to check if it dominates all users of the recurrence. Bail out
8459d24933fSFlorian Hahn   // if we have to sink such an instruction for another recurrence, as the
8469d24933fSFlorian Hahn   // dominance requirement may not hold after sinking.
8479d24933fSFlorian Hahn   BasicBlock *LoopLatch = TheLoop->getLoopLatch();
8489d24933fSFlorian Hahn   if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
8499d24933fSFlorian Hahn         Instruction *V =
8509d24933fSFlorian Hahn             cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
8519d24933fSFlorian Hahn         return SinkAfter.find(V) != SinkAfter.end();
8529d24933fSFlorian Hahn       }))
8539d24933fSFlorian Hahn     return false;
8549d24933fSFlorian Hahn 
855f2ec16ccSHideki Saito   // Now we know the widest induction type, check if our found induction
856f2ec16ccSHideki Saito   // is the same size. If it's not, unset it here and InnerLoopVectorizer
857f2ec16ccSHideki Saito   // will create another.
858f2ec16ccSHideki Saito   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
859f2ec16ccSHideki Saito     PrimaryInduction = nullptr;
860f2ec16ccSHideki Saito 
861f2ec16ccSHideki Saito   return true;
862f2ec16ccSHideki Saito }
863f2ec16ccSHideki Saito 
864f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() {
865f2ec16ccSHideki Saito   LAI = &(*GetLAA)(*TheLoop);
866f2ec16ccSHideki Saito   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
867f2ec16ccSHideki Saito   if (LAR) {
868f2ec16ccSHideki Saito     ORE->emit([&]() {
869f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
870f2ec16ccSHideki Saito                                         "loop not vectorized: ", *LAR);
871f2ec16ccSHideki Saito     });
872f2ec16ccSHideki Saito   }
873f2ec16ccSHideki Saito   if (!LAI->canVectorizeMemory())
874f2ec16ccSHideki Saito     return false;
875f2ec16ccSHideki Saito 
8765e9215f0SAnna Thomas   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
8779e97caf5SRenato Golin     reportVectorizationFailure("Stores to a uniform address",
8789e97caf5SRenato Golin         "write to a loop invariant address could not be vectorized",
879ec818d7fSHideki Saito         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
880f2ec16ccSHideki Saito     return false;
881f2ec16ccSHideki Saito   }
882f2ec16ccSHideki Saito   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
883f2ec16ccSHideki Saito   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
884f2ec16ccSHideki Saito 
885f2ec16ccSHideki Saito   return true;
886f2ec16ccSHideki Saito }
887f2ec16ccSHideki Saito 
888f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
889f2ec16ccSHideki Saito   Value *In0 = const_cast<Value *>(V);
890f2ec16ccSHideki Saito   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
891f2ec16ccSHideki Saito   if (!PN)
892f2ec16ccSHideki Saito     return false;
893f2ec16ccSHideki Saito 
894f2ec16ccSHideki Saito   return Inductions.count(PN);
895f2ec16ccSHideki Saito }
896f2ec16ccSHideki Saito 
897f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
898f2ec16ccSHideki Saito   auto *Inst = dyn_cast<Instruction>(V);
899f2ec16ccSHideki Saito   return (Inst && InductionCastsToIgnore.count(Inst));
900f2ec16ccSHideki Saito }
901f2ec16ccSHideki Saito 
902f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
903f2ec16ccSHideki Saito   return isInductionPhi(V) || isCastedInductionVariable(V);
904f2ec16ccSHideki Saito }
905f2ec16ccSHideki Saito 
906f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
907f2ec16ccSHideki Saito   return FirstOrderRecurrences.count(Phi);
908f2ec16ccSHideki Saito }
909f2ec16ccSHideki Saito 
910f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
911f2ec16ccSHideki Saito   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
912f2ec16ccSHideki Saito }
913f2ec16ccSHideki Saito 
914f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated(
915d57d73daSDorit Nuzman     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
916f2ec16ccSHideki Saito   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
917f2ec16ccSHideki Saito 
918f2ec16ccSHideki Saito   for (Instruction &I : *BB) {
919f2ec16ccSHideki Saito     // Check that we don't have a constant expression that can trap as operand.
920f2ec16ccSHideki Saito     for (Value *Operand : I.operands()) {
921f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(Operand))
922f2ec16ccSHideki Saito         if (C->canTrap())
923f2ec16ccSHideki Saito           return false;
924f2ec16ccSHideki Saito     }
92523c11380SFlorian Hahn 
92623c11380SFlorian Hahn     // We can predicate blocks with calls to assume, as long as we drop them in
92723c11380SFlorian Hahn     // case we flatten the CFG via predication.
92823c11380SFlorian Hahn     if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
92923c11380SFlorian Hahn       ConditionalAssumes.insert(&I);
93023c11380SFlorian Hahn       continue;
93123c11380SFlorian Hahn     }
93223c11380SFlorian Hahn 
933f2ec16ccSHideki Saito     // We might be able to hoist the load.
934f2ec16ccSHideki Saito     if (I.mayReadFromMemory()) {
935f2ec16ccSHideki Saito       auto *LI = dyn_cast<LoadInst>(&I);
936f2ec16ccSHideki Saito       if (!LI)
937f2ec16ccSHideki Saito         return false;
938f2ec16ccSHideki Saito       if (!SafePtrs.count(LI->getPointerOperand())) {
939f2ec16ccSHideki Saito         // !llvm.mem.parallel_loop_access implies if-conversion safety.
940f2ec16ccSHideki Saito         // Otherwise, record that the load needs (real or emulated) masking
941f2ec16ccSHideki Saito         // and let the cost model decide.
942d57d73daSDorit Nuzman         if (!IsAnnotatedParallel || PreserveGuards)
943f2ec16ccSHideki Saito           MaskedOp.insert(LI);
944f2ec16ccSHideki Saito         continue;
945f2ec16ccSHideki Saito       }
946f2ec16ccSHideki Saito     }
947f2ec16ccSHideki Saito 
948f2ec16ccSHideki Saito     if (I.mayWriteToMemory()) {
949f2ec16ccSHideki Saito       auto *SI = dyn_cast<StoreInst>(&I);
950f2ec16ccSHideki Saito       if (!SI)
951f2ec16ccSHideki Saito         return false;
952f2ec16ccSHideki Saito       // Predicated store requires some form of masking:
953f2ec16ccSHideki Saito       // 1) masked store HW instruction,
954f2ec16ccSHideki Saito       // 2) emulation via load-blend-store (only if safe and legal to do so,
955f2ec16ccSHideki Saito       //    be aware on the race conditions), or
956f2ec16ccSHideki Saito       // 3) element-by-element predicate check and scalar store.
957f2ec16ccSHideki Saito       MaskedOp.insert(SI);
958f2ec16ccSHideki Saito       continue;
959f2ec16ccSHideki Saito     }
960f2ec16ccSHideki Saito     if (I.mayThrow())
961f2ec16ccSHideki Saito       return false;
962f2ec16ccSHideki Saito   }
963f2ec16ccSHideki Saito 
964f2ec16ccSHideki Saito   return true;
965f2ec16ccSHideki Saito }
966f2ec16ccSHideki Saito 
967f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
968f2ec16ccSHideki Saito   if (!EnableIfConversion) {
9699e97caf5SRenato Golin     reportVectorizationFailure("If-conversion is disabled",
9709e97caf5SRenato Golin                                "if-conversion is disabled",
971ec818d7fSHideki Saito                                "IfConversionDisabled",
972ec818d7fSHideki Saito                                ORE, TheLoop);
973f2ec16ccSHideki Saito     return false;
974f2ec16ccSHideki Saito   }
975f2ec16ccSHideki Saito 
976f2ec16ccSHideki Saito   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
977f2ec16ccSHideki Saito 
978cf3b5559SPhilip Reames   // A list of pointers which are known to be dereferenceable within scope of
979cf3b5559SPhilip Reames   // the loop body for each iteration of the loop which executes.  That is,
980cf3b5559SPhilip Reames   // the memory pointed to can be dereferenced (with the access size implied by
981cf3b5559SPhilip Reames   // the value's type) unconditionally within the loop header without
982cf3b5559SPhilip Reames   // introducing a new fault.
9833bbc71d6SSjoerd Meijer   SmallPtrSet<Value *, 8> SafePointers;
984f2ec16ccSHideki Saito 
985f2ec16ccSHideki Saito   // Collect safe addresses.
986f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
9877403569bSPhilip Reames     if (!blockNeedsPredication(BB)) {
988f2ec16ccSHideki Saito       for (Instruction &I : *BB)
989f2ec16ccSHideki Saito         if (auto *Ptr = getLoadStorePointerOperand(&I))
9903bbc71d6SSjoerd Meijer           SafePointers.insert(Ptr);
9917403569bSPhilip Reames       continue;
9927403569bSPhilip Reames     }
9937403569bSPhilip Reames 
9947403569bSPhilip Reames     // For a block which requires predication, a address may be safe to access
9957403569bSPhilip Reames     // in the loop w/o predication if we can prove dereferenceability facts
9967403569bSPhilip Reames     // sufficient to ensure it'll never fault within the loop. For the moment,
9977403569bSPhilip Reames     // we restrict this to loads; stores are more complicated due to
9987403569bSPhilip Reames     // concurrency restrictions.
9997403569bSPhilip Reames     ScalarEvolution &SE = *PSE.getSE();
10007403569bSPhilip Reames     for (Instruction &I : *BB) {
10017403569bSPhilip Reames       LoadInst *LI = dyn_cast<LoadInst>(&I);
10027403569bSPhilip Reames       if (LI && !mustSuppressSpeculation(*LI) &&
10037403569bSPhilip Reames           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
10043bbc71d6SSjoerd Meijer         SafePointers.insert(LI->getPointerOperand());
10057403569bSPhilip Reames     }
1006f2ec16ccSHideki Saito   }
1007f2ec16ccSHideki Saito 
1008f2ec16ccSHideki Saito   // Collect the blocks that need predication.
1009f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
1010f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
1011f2ec16ccSHideki Saito     // We don't support switch statements inside loops.
1012f2ec16ccSHideki Saito     if (!isa<BranchInst>(BB->getTerminator())) {
10139e97caf5SRenato Golin       reportVectorizationFailure("Loop contains a switch statement",
10149e97caf5SRenato Golin                                  "loop contains a switch statement",
1015ec818d7fSHideki Saito                                  "LoopContainsSwitch", ORE, TheLoop,
1016ec818d7fSHideki Saito                                  BB->getTerminator());
1017f2ec16ccSHideki Saito       return false;
1018f2ec16ccSHideki Saito     }
1019f2ec16ccSHideki Saito 
1020f2ec16ccSHideki Saito     // We must be able to predicate all blocks that need to be predicated.
1021f2ec16ccSHideki Saito     if (blockNeedsPredication(BB)) {
10223bbc71d6SSjoerd Meijer       if (!blockCanBePredicated(BB, SafePointers)) {
10239e97caf5SRenato Golin         reportVectorizationFailure(
10249e97caf5SRenato Golin             "Control flow cannot be substituted for a select",
10259e97caf5SRenato Golin             "control flow cannot be substituted for a select",
1026ec818d7fSHideki Saito             "NoCFGForSelect", ORE, TheLoop,
1027ec818d7fSHideki Saito             BB->getTerminator());
1028f2ec16ccSHideki Saito         return false;
1029f2ec16ccSHideki Saito       }
1030f2ec16ccSHideki Saito     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
10319e97caf5SRenato Golin       reportVectorizationFailure(
10329e97caf5SRenato Golin           "Control flow cannot be substituted for a select",
10339e97caf5SRenato Golin           "control flow cannot be substituted for a select",
1034ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
1035ec818d7fSHideki Saito           BB->getTerminator());
1036f2ec16ccSHideki Saito       return false;
1037f2ec16ccSHideki Saito     }
1038f2ec16ccSHideki Saito   }
1039f2ec16ccSHideki Saito 
1040f2ec16ccSHideki Saito   // We can if-convert this loop.
1041f2ec16ccSHideki Saito   return true;
1042f2ec16ccSHideki Saito }
1043f2ec16ccSHideki Saito 
1044f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG.
1045f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1046f2ec16ccSHideki Saito                                                     bool UseVPlanNativePath) {
1047f2ec16ccSHideki Saito   assert((UseVPlanNativePath || Lp->empty()) &&
1048f2ec16ccSHideki Saito          "VPlan-native path is not enabled.");
1049f2ec16ccSHideki Saito 
1050f2ec16ccSHideki Saito   // TODO: ORE should be improved to show more accurate information when an
1051f2ec16ccSHideki Saito   // outer loop can't be vectorized because a nested loop is not understood or
1052f2ec16ccSHideki Saito   // legal. Something like: "outer_loop_location: loop not vectorized:
1053f2ec16ccSHideki Saito   // (inner_loop_location) loop control flow is not understood by vectorizer".
1054f2ec16ccSHideki Saito 
1055f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1056f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1057f2ec16ccSHideki Saito   bool Result = true;
1058f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1059f2ec16ccSHideki Saito 
1060f2ec16ccSHideki Saito   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1061f2ec16ccSHideki Saito   // be canonicalized.
1062f2ec16ccSHideki Saito   if (!Lp->getLoopPreheader()) {
10639e97caf5SRenato Golin     reportVectorizationFailure("Loop doesn't have a legal pre-header",
10649e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1065ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1066f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1067f2ec16ccSHideki Saito       Result = false;
1068f2ec16ccSHideki Saito     else
1069f2ec16ccSHideki Saito       return false;
1070f2ec16ccSHideki Saito   }
1071f2ec16ccSHideki Saito 
1072f2ec16ccSHideki Saito   // We must have a single backedge.
1073f2ec16ccSHideki Saito   if (Lp->getNumBackEdges() != 1) {
10749e97caf5SRenato Golin     reportVectorizationFailure("The loop must have a single backedge",
10759e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1076ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1077f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1078f2ec16ccSHideki Saito       Result = false;
1079f2ec16ccSHideki Saito     else
1080f2ec16ccSHideki Saito       return false;
1081f2ec16ccSHideki Saito   }
1082f2ec16ccSHideki Saito 
1083f2ec16ccSHideki Saito   // We must have a single exiting block.
1084f2ec16ccSHideki Saito   if (!Lp->getExitingBlock()) {
10859e97caf5SRenato Golin     reportVectorizationFailure("The loop must have an exiting block",
10869e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1087ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1088f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1089f2ec16ccSHideki Saito       Result = false;
1090f2ec16ccSHideki Saito     else
1091f2ec16ccSHideki Saito       return false;
1092f2ec16ccSHideki Saito   }
1093f2ec16ccSHideki Saito 
1094f2ec16ccSHideki Saito   // We only handle bottom-tested loops, i.e. loop in which the condition is
1095f2ec16ccSHideki Saito   // checked at the end of each iteration. With that we can assume that all
1096f2ec16ccSHideki Saito   // instructions in the loop are executed the same number of times.
1097f2ec16ccSHideki Saito   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
10989e97caf5SRenato Golin     reportVectorizationFailure("The exiting block is not the loop latch",
10999e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1100ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1101f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1102f2ec16ccSHideki Saito       Result = false;
1103f2ec16ccSHideki Saito     else
1104f2ec16ccSHideki Saito       return false;
1105f2ec16ccSHideki Saito   }
1106f2ec16ccSHideki Saito 
1107f2ec16ccSHideki Saito   return Result;
1108f2ec16ccSHideki Saito }
1109f2ec16ccSHideki Saito 
1110f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1111f2ec16ccSHideki Saito     Loop *Lp, bool UseVPlanNativePath) {
1112f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1113f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1114f2ec16ccSHideki Saito   bool Result = true;
1115f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1116f2ec16ccSHideki Saito   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1117f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1118f2ec16ccSHideki Saito       Result = false;
1119f2ec16ccSHideki Saito     else
1120f2ec16ccSHideki Saito       return false;
1121f2ec16ccSHideki Saito   }
1122f2ec16ccSHideki Saito 
1123f2ec16ccSHideki Saito   // Recursively check whether the loop control flow of nested loops is
1124f2ec16ccSHideki Saito   // understood.
1125f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
1126f2ec16ccSHideki Saito     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1127f2ec16ccSHideki Saito       if (DoExtraAnalysis)
1128f2ec16ccSHideki Saito         Result = false;
1129f2ec16ccSHideki Saito       else
1130f2ec16ccSHideki Saito         return false;
1131f2ec16ccSHideki Saito     }
1132f2ec16ccSHideki Saito 
1133f2ec16ccSHideki Saito   return Result;
1134f2ec16ccSHideki Saito }
1135f2ec16ccSHideki Saito 
1136f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1137f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1138f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1139f2ec16ccSHideki Saito   bool Result = true;
1140f2ec16ccSHideki Saito 
1141f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1142f2ec16ccSHideki Saito   // Check whether the loop-related control flow in the loop nest is expected by
1143f2ec16ccSHideki Saito   // vectorizer.
1144f2ec16ccSHideki Saito   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1145f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1146f2ec16ccSHideki Saito       Result = false;
1147f2ec16ccSHideki Saito     else
1148f2ec16ccSHideki Saito       return false;
1149f2ec16ccSHideki Saito   }
1150f2ec16ccSHideki Saito 
1151f2ec16ccSHideki Saito   // We need to have a loop header.
1152d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1153f2ec16ccSHideki Saito                     << '\n');
1154f2ec16ccSHideki Saito 
1155f2ec16ccSHideki Saito   // Specific checks for outer loops. We skip the remaining legal checks at this
1156f2ec16ccSHideki Saito   // point because they don't support outer loops.
1157f2ec16ccSHideki Saito   if (!TheLoop->empty()) {
1158f2ec16ccSHideki Saito     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1159f2ec16ccSHideki Saito 
1160f2ec16ccSHideki Saito     if (!canVectorizeOuterLoop()) {
11619e97caf5SRenato Golin       reportVectorizationFailure("Unsupported outer loop",
11629e97caf5SRenato Golin                                  "unsupported outer loop",
1163ec818d7fSHideki Saito                                  "UnsupportedOuterLoop",
1164ec818d7fSHideki Saito                                  ORE, TheLoop);
1165f2ec16ccSHideki Saito       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1166f2ec16ccSHideki Saito       // outer loops.
1167f2ec16ccSHideki Saito       return false;
1168f2ec16ccSHideki Saito     }
1169f2ec16ccSHideki Saito 
1170d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1171f2ec16ccSHideki Saito     return Result;
1172f2ec16ccSHideki Saito   }
1173f2ec16ccSHideki Saito 
1174f2ec16ccSHideki Saito   assert(TheLoop->empty() && "Inner loop expected.");
1175f2ec16ccSHideki Saito   // Check if we can if-convert non-single-bb loops.
1176f2ec16ccSHideki Saito   unsigned NumBlocks = TheLoop->getNumBlocks();
1177f2ec16ccSHideki Saito   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1178d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1179f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1180f2ec16ccSHideki Saito       Result = false;
1181f2ec16ccSHideki Saito     else
1182f2ec16ccSHideki Saito       return false;
1183f2ec16ccSHideki Saito   }
1184f2ec16ccSHideki Saito 
1185f2ec16ccSHideki Saito   // Check if we can vectorize the instructions and CFG in this loop.
1186f2ec16ccSHideki Saito   if (!canVectorizeInstrs()) {
1187d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1188f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1189f2ec16ccSHideki Saito       Result = false;
1190f2ec16ccSHideki Saito     else
1191f2ec16ccSHideki Saito       return false;
1192f2ec16ccSHideki Saito   }
1193f2ec16ccSHideki Saito 
1194f2ec16ccSHideki Saito   // Go over each instruction and look at memory deps.
1195f2ec16ccSHideki Saito   if (!canVectorizeMemory()) {
1196d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1197f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1198f2ec16ccSHideki Saito       Result = false;
1199f2ec16ccSHideki Saito     else
1200f2ec16ccSHideki Saito       return false;
1201f2ec16ccSHideki Saito   }
1202f2ec16ccSHideki Saito 
1203d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1204f2ec16ccSHideki Saito                     << (LAI->getRuntimePointerChecking()->Need
1205f2ec16ccSHideki Saito                             ? " (with a runtime bound check)"
1206f2ec16ccSHideki Saito                             : "")
1207f2ec16ccSHideki Saito                     << "!\n");
1208f2ec16ccSHideki Saito 
1209f2ec16ccSHideki Saito   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1210f2ec16ccSHideki Saito   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1211f2ec16ccSHideki Saito     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1212f2ec16ccSHideki Saito 
1213f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
12149e97caf5SRenato Golin     reportVectorizationFailure("Too many SCEV checks needed",
12159e97caf5SRenato Golin         "Too many SCEV assumptions need to be made and checked at runtime",
1216ec818d7fSHideki Saito         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1217f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1218f2ec16ccSHideki Saito       Result = false;
1219f2ec16ccSHideki Saito     else
1220f2ec16ccSHideki Saito       return false;
1221f2ec16ccSHideki Saito   }
1222f2ec16ccSHideki Saito 
1223f2ec16ccSHideki Saito   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1224f2ec16ccSHideki Saito   // we can vectorize, and at this point we don't have any other mem analysis
1225f2ec16ccSHideki Saito   // which may limit our maximum vectorization factor, so just return true with
1226f2ec16ccSHideki Saito   // no restrictions.
1227f2ec16ccSHideki Saito   return Result;
1228f2ec16ccSHideki Saito }
1229f2ec16ccSHideki Saito 
1230d57d73daSDorit Nuzman bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1231b0b5312eSAyal Zaks 
1232b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1233b0b5312eSAyal Zaks 
1234d15df0edSAyal Zaks   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1235b0b5312eSAyal Zaks 
1236d0d38df0SDavid Green   for (auto &Reduction : getReductionVars())
1237d15df0edSAyal Zaks     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1238d15df0edSAyal Zaks 
1239d15df0edSAyal Zaks   // TODO: handle non-reduction outside users when tail is folded by masking.
1240b0b5312eSAyal Zaks   for (auto *AE : AllowedExit) {
1241d15df0edSAyal Zaks     // Check that all users of allowed exit values are inside the loop or
1242d15df0edSAyal Zaks     // are the live-out of a reduction.
1243d15df0edSAyal Zaks     if (ReductionLiveOuts.count(AE))
1244d15df0edSAyal Zaks       continue;
1245b0b5312eSAyal Zaks     for (User *U : AE->users()) {
1246b0b5312eSAyal Zaks       Instruction *UI = cast<Instruction>(U);
1247b0b5312eSAyal Zaks       if (TheLoop->contains(UI))
1248b0b5312eSAyal Zaks         continue;
12499e97caf5SRenato Golin       reportVectorizationFailure(
12509e97caf5SRenato Golin           "Cannot fold tail by masking, loop has an outside user for",
12519e97caf5SRenato Golin           "Cannot fold tail by masking in the presence of live outs.",
1252ec818d7fSHideki Saito           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1253b0b5312eSAyal Zaks       return false;
1254b0b5312eSAyal Zaks     }
1255b0b5312eSAyal Zaks   }
1256b0b5312eSAyal Zaks 
1257b0b5312eSAyal Zaks   // The list of pointers that we can safely read and write to remains empty.
1258b0b5312eSAyal Zaks   SmallPtrSet<Value *, 8> SafePointers;
1259b0b5312eSAyal Zaks 
1260b0b5312eSAyal Zaks   // Check and mark all blocks for predication, including those that ordinarily
1261b0b5312eSAyal Zaks   // do not need predication such as the header block.
1262b0b5312eSAyal Zaks   for (BasicBlock *BB : TheLoop->blocks()) {
1263d57d73daSDorit Nuzman     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
12649e97caf5SRenato Golin       reportVectorizationFailure(
12659e97caf5SRenato Golin           "Cannot fold tail by masking as required",
12669e97caf5SRenato Golin           "control flow cannot be substituted for a select",
1267ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
1268ec818d7fSHideki Saito           BB->getTerminator());
1269b0b5312eSAyal Zaks       return false;
1270b0b5312eSAyal Zaks     }
1271b0b5312eSAyal Zaks   }
1272b0b5312eSAyal Zaks 
1273b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1274b0b5312eSAyal Zaks   return true;
1275b0b5312eSAyal Zaks }
1276b0b5312eSAyal Zaks 
1277f2ec16ccSHideki Saito } // namespace llvm
1278