1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===//
2f2ec16ccSHideki Saito //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f2ec16ccSHideki Saito //
7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===//
8f2ec16ccSHideki Saito //
9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code
10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time.
11f2ec16ccSHideki Saito //
12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis
13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there
14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first).
15f2ec16ccSHideki Saito //
16f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
177403569bSPhilip Reames #include "llvm/Analysis/Loads.h"
18a5f1f9c9SSimon Pilgrim #include "llvm/Analysis/LoopInfo.h"
197403569bSPhilip Reames #include "llvm/Analysis/ValueTracking.h"
20f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h"
21f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h"
2223c11380SFlorian Hahn #include "llvm/IR/PatternMatch.h"
23*7bedae7dSHiroshi Yamauchi #include "llvm/Transforms/Utils/SizeOpts.h"
2423c11380SFlorian Hahn #include "llvm/Transforms/Vectorize/LoopVectorize.h"
25f2ec16ccSHideki Saito 
26f2ec16ccSHideki Saito using namespace llvm;
2723c11380SFlorian Hahn using namespace PatternMatch;
28f2ec16ccSHideki Saito 
29f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize"
30f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME
31f2ec16ccSHideki Saito 
324e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication;
334e4ecae0SHideki Saito 
34f2ec16ccSHideki Saito static cl::opt<bool>
35f2ec16ccSHideki Saito     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
36f2ec16ccSHideki Saito                        cl::desc("Enable if-conversion during vectorization."));
37f2ec16ccSHideki Saito 
38f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
39f2ec16ccSHideki Saito     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
40f2ec16ccSHideki Saito     cl::desc("The maximum allowed number of runtime memory checks with a "
41f2ec16ccSHideki Saito              "vectorize(enable) pragma."));
42f2ec16ccSHideki Saito 
43f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
44f2ec16ccSHideki Saito     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
45f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed."));
46f2ec16ccSHideki Saito 
47f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
48f2ec16ccSHideki Saito     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
49f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed with a "
50f2ec16ccSHideki Saito              "vectorize(enable) pragma"));
51f2ec16ccSHideki Saito 
52f2ec16ccSHideki Saito /// Maximum vectorization interleave count.
53f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16;
54f2ec16ccSHideki Saito 
55f2ec16ccSHideki Saito namespace llvm {
56f2ec16ccSHideki Saito 
57f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) {
58f2ec16ccSHideki Saito   switch (Kind) {
59f2ec16ccSHideki Saito   case HK_WIDTH:
60f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
61f2ec16ccSHideki Saito   case HK_UNROLL:
62f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
63f2ec16ccSHideki Saito   case HK_FORCE:
64f2ec16ccSHideki Saito     return (Val <= 1);
65f2ec16ccSHideki Saito   case HK_ISVECTORIZED:
6620b198ecSSjoerd Meijer   case HK_PREDICATE:
67f2ec16ccSHideki Saito     return (Val == 0 || Val == 1);
68f2ec16ccSHideki Saito   }
69f2ec16ccSHideki Saito   return false;
70f2ec16ccSHideki Saito }
71f2ec16ccSHideki Saito 
72d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
73d4eb13c8SMichael Kruse                                        bool InterleaveOnlyWhenForced,
74f2ec16ccSHideki Saito                                        OptimizationRemarkEmitter &ORE)
75f2ec16ccSHideki Saito     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
76d4eb13c8SMichael Kruse       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
77f2ec16ccSHideki Saito       Force("vectorize.enable", FK_Undefined, HK_FORCE),
7820b198ecSSjoerd Meijer       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
79cb47b878SSjoerd Meijer       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
8020b198ecSSjoerd Meijer       ORE(ORE) {
81f2ec16ccSHideki Saito   // Populate values with existing loop metadata.
82f2ec16ccSHideki Saito   getHintsFromMetadata();
83f2ec16ccSHideki Saito 
84f2ec16ccSHideki Saito   // force-vector-interleave overrides DisableInterleaving.
85f2ec16ccSHideki Saito   if (VectorizerParams::isInterleaveForced())
86f2ec16ccSHideki Saito     Interleave.Value = VectorizerParams::VectorizationInterleave;
87f2ec16ccSHideki Saito 
88f2ec16ccSHideki Saito   if (IsVectorized.Value != 1)
89f2ec16ccSHideki Saito     // If the vectorization width and interleaving count are both 1 then
90f2ec16ccSHideki Saito     // consider the loop to have been already vectorized because there's
91f2ec16ccSHideki Saito     // nothing more that we can do.
92f2ec16ccSHideki Saito     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
93d4eb13c8SMichael Kruse   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
94f2ec16ccSHideki Saito              << "LV: Interleaving disabled by the pass manager\n");
95f2ec16ccSHideki Saito }
96f2ec16ccSHideki Saito 
9777a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() {
9877a614a6SMichael Kruse   LLVMContext &Context = TheLoop->getHeader()->getContext();
9977a614a6SMichael Kruse 
10077a614a6SMichael Kruse   MDNode *IsVectorizedMD = MDNode::get(
10177a614a6SMichael Kruse       Context,
10277a614a6SMichael Kruse       {MDString::get(Context, "llvm.loop.isvectorized"),
10377a614a6SMichael Kruse        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
10477a614a6SMichael Kruse   MDNode *LoopID = TheLoop->getLoopID();
10577a614a6SMichael Kruse   MDNode *NewLoopID =
10677a614a6SMichael Kruse       makePostTransformationMetadata(Context, LoopID,
10777a614a6SMichael Kruse                                      {Twine(Prefix(), "vectorize.").str(),
10877a614a6SMichael Kruse                                       Twine(Prefix(), "interleave.").str()},
10977a614a6SMichael Kruse                                      {IsVectorizedMD});
11077a614a6SMichael Kruse   TheLoop->setLoopID(NewLoopID);
11177a614a6SMichael Kruse 
11277a614a6SMichael Kruse   // Update internal cache.
11377a614a6SMichael Kruse   IsVectorized.Value = 1;
11477a614a6SMichael Kruse }
11577a614a6SMichael Kruse 
116d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization(
117d4eb13c8SMichael Kruse     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
118f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled) {
119d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
120f2ec16ccSHideki Saito     emitRemarkWithHints();
121f2ec16ccSHideki Saito     return false;
122f2ec16ccSHideki Saito   }
123f2ec16ccSHideki Saito 
124d4eb13c8SMichael Kruse   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
125d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
126f2ec16ccSHideki Saito     emitRemarkWithHints();
127f2ec16ccSHideki Saito     return false;
128f2ec16ccSHideki Saito   }
129f2ec16ccSHideki Saito 
130f2ec16ccSHideki Saito   if (getIsVectorized() == 1) {
131d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
132f2ec16ccSHideki Saito     // FIXME: Add interleave.disable metadata. This will allow
133f2ec16ccSHideki Saito     // vectorize.disable to be used without disabling the pass and errors
134f2ec16ccSHideki Saito     // to differentiate between disabled vectorization and a width of 1.
135f2ec16ccSHideki Saito     ORE.emit([&]() {
136f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
137f2ec16ccSHideki Saito                                         "AllDisabled", L->getStartLoc(),
138f2ec16ccSHideki Saito                                         L->getHeader())
139f2ec16ccSHideki Saito              << "loop not vectorized: vectorization and interleaving are "
140f2ec16ccSHideki Saito                 "explicitly disabled, or the loop has already been "
141f2ec16ccSHideki Saito                 "vectorized";
142f2ec16ccSHideki Saito     });
143f2ec16ccSHideki Saito     return false;
144f2ec16ccSHideki Saito   }
145f2ec16ccSHideki Saito 
146f2ec16ccSHideki Saito   return true;
147f2ec16ccSHideki Saito }
148f2ec16ccSHideki Saito 
149f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const {
150f2ec16ccSHideki Saito   using namespace ore;
151f2ec16ccSHideki Saito 
152f2ec16ccSHideki Saito   ORE.emit([&]() {
153f2ec16ccSHideki Saito     if (Force.Value == LoopVectorizeHints::FK_Disabled)
154f2ec16ccSHideki Saito       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
155f2ec16ccSHideki Saito                                       TheLoop->getStartLoc(),
156f2ec16ccSHideki Saito                                       TheLoop->getHeader())
157f2ec16ccSHideki Saito              << "loop not vectorized: vectorization is explicitly disabled";
158f2ec16ccSHideki Saito     else {
159f2ec16ccSHideki Saito       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
160f2ec16ccSHideki Saito                                  TheLoop->getStartLoc(), TheLoop->getHeader());
161f2ec16ccSHideki Saito       R << "loop not vectorized";
162f2ec16ccSHideki Saito       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
163f2ec16ccSHideki Saito         R << " (Force=" << NV("Force", true);
164f2ec16ccSHideki Saito         if (Width.Value != 0)
165f2ec16ccSHideki Saito           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
166f2ec16ccSHideki Saito         if (Interleave.Value != 0)
167f2ec16ccSHideki Saito           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
168f2ec16ccSHideki Saito         R << ")";
169f2ec16ccSHideki Saito       }
170f2ec16ccSHideki Saito       return R;
171f2ec16ccSHideki Saito     }
172f2ec16ccSHideki Saito   });
173f2ec16ccSHideki Saito }
174f2ec16ccSHideki Saito 
175f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
176f2ec16ccSHideki Saito   if (getWidth() == 1)
177f2ec16ccSHideki Saito     return LV_NAME;
178f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled)
179f2ec16ccSHideki Saito     return LV_NAME;
180f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
181f2ec16ccSHideki Saito     return LV_NAME;
182f2ec16ccSHideki Saito   return OptimizationRemarkAnalysis::AlwaysPrint;
183f2ec16ccSHideki Saito }
184f2ec16ccSHideki Saito 
185f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() {
186f2ec16ccSHideki Saito   MDNode *LoopID = TheLoop->getLoopID();
187f2ec16ccSHideki Saito   if (!LoopID)
188f2ec16ccSHideki Saito     return;
189f2ec16ccSHideki Saito 
190f2ec16ccSHideki Saito   // First operand should refer to the loop id itself.
191f2ec16ccSHideki Saito   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
192f2ec16ccSHideki Saito   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
193f2ec16ccSHideki Saito 
194f2ec16ccSHideki Saito   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
195f2ec16ccSHideki Saito     const MDString *S = nullptr;
196f2ec16ccSHideki Saito     SmallVector<Metadata *, 4> Args;
197f2ec16ccSHideki Saito 
198f2ec16ccSHideki Saito     // The expected hint is either a MDString or a MDNode with the first
199f2ec16ccSHideki Saito     // operand a MDString.
200f2ec16ccSHideki Saito     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
201f2ec16ccSHideki Saito       if (!MD || MD->getNumOperands() == 0)
202f2ec16ccSHideki Saito         continue;
203f2ec16ccSHideki Saito       S = dyn_cast<MDString>(MD->getOperand(0));
204f2ec16ccSHideki Saito       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
205f2ec16ccSHideki Saito         Args.push_back(MD->getOperand(i));
206f2ec16ccSHideki Saito     } else {
207f2ec16ccSHideki Saito       S = dyn_cast<MDString>(LoopID->getOperand(i));
208f2ec16ccSHideki Saito       assert(Args.size() == 0 && "too many arguments for MDString");
209f2ec16ccSHideki Saito     }
210f2ec16ccSHideki Saito 
211f2ec16ccSHideki Saito     if (!S)
212f2ec16ccSHideki Saito       continue;
213f2ec16ccSHideki Saito 
214f2ec16ccSHideki Saito     // Check if the hint starts with the loop metadata prefix.
215f2ec16ccSHideki Saito     StringRef Name = S->getString();
216f2ec16ccSHideki Saito     if (Args.size() == 1)
217f2ec16ccSHideki Saito       setHint(Name, Args[0]);
218f2ec16ccSHideki Saito   }
219f2ec16ccSHideki Saito }
220f2ec16ccSHideki Saito 
221f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
222f2ec16ccSHideki Saito   if (!Name.startswith(Prefix()))
223f2ec16ccSHideki Saito     return;
224f2ec16ccSHideki Saito   Name = Name.substr(Prefix().size(), StringRef::npos);
225f2ec16ccSHideki Saito 
226f2ec16ccSHideki Saito   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
227f2ec16ccSHideki Saito   if (!C)
228f2ec16ccSHideki Saito     return;
229f2ec16ccSHideki Saito   unsigned Val = C->getZExtValue();
230f2ec16ccSHideki Saito 
23120b198ecSSjoerd Meijer   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
232f2ec16ccSHideki Saito   for (auto H : Hints) {
233f2ec16ccSHideki Saito     if (Name == H->Name) {
234f2ec16ccSHideki Saito       if (H->validate(Val))
235f2ec16ccSHideki Saito         H->Value = Val;
236f2ec16ccSHideki Saito       else
237d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
238f2ec16ccSHideki Saito       break;
239f2ec16ccSHideki Saito     }
240f2ec16ccSHideki Saito   }
241f2ec16ccSHideki Saito }
242f2ec16ccSHideki Saito 
243f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet(
244f2ec16ccSHideki Saito     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
245f2ec16ccSHideki Saito   const char *PassName = Hints.vectorizeAnalysisPassName();
246f2ec16ccSHideki Saito   bool Failed = false;
247f2ec16ccSHideki Saito   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
248f2ec16ccSHideki Saito     ORE.emit([&]() {
249f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisFPCommute(
250f2ec16ccSHideki Saito                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
251f2ec16ccSHideki Saito                  UnsafeAlgebraInst->getParent())
252f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
253f2ec16ccSHideki Saito                 "floating-point operations";
254f2ec16ccSHideki Saito     });
255f2ec16ccSHideki Saito     Failed = true;
256f2ec16ccSHideki Saito   }
257f2ec16ccSHideki Saito 
258f2ec16ccSHideki Saito   // Test if runtime memcheck thresholds are exceeded.
259f2ec16ccSHideki Saito   bool PragmaThresholdReached =
260f2ec16ccSHideki Saito       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
261f2ec16ccSHideki Saito   bool ThresholdReached =
262f2ec16ccSHideki Saito       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
263f2ec16ccSHideki Saito   if ((ThresholdReached && !Hints.allowReordering()) ||
264f2ec16ccSHideki Saito       PragmaThresholdReached) {
265f2ec16ccSHideki Saito     ORE.emit([&]() {
266f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
267f2ec16ccSHideki Saito                                                 L->getStartLoc(),
268f2ec16ccSHideki Saito                                                 L->getHeader())
269f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
270f2ec16ccSHideki Saito                 "memory operations";
271f2ec16ccSHideki Saito     });
272d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
273f2ec16ccSHideki Saito     Failed = true;
274f2ec16ccSHideki Saito   }
275f2ec16ccSHideki Saito 
276f2ec16ccSHideki Saito   return Failed;
277f2ec16ccSHideki Saito }
278f2ec16ccSHideki Saito 
279f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop
280f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
281f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is
282f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is
283f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions:
284f2ec16ccSHideki Saito //   1) it has a canonical IV (starting from 0 and with stride 1),
285f2ec16ccSHideki Saito //   2) its latch terminator is a conditional branch and,
286f2ec16ccSHideki Saito //   3) its latch condition is a compare instruction whose operands are the
287f2ec16ccSHideki Saito //      canonical IV and an OuterLp invariant.
288f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not
289f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity.
290f2ec16ccSHideki Saito //
291f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation
292f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop
293f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and
294f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for
295f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we
296f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer
297f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit
298f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away
299f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not
300f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate
301f2ec16ccSHideki Saito // function that is only executed once for each \p Lp.
302f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
303f2ec16ccSHideki Saito   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
304f2ec16ccSHideki Saito 
305f2ec16ccSHideki Saito   // If Lp is the outer loop, it's uniform by definition.
306f2ec16ccSHideki Saito   if (Lp == OuterLp)
307f2ec16ccSHideki Saito     return true;
308f2ec16ccSHideki Saito   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
309f2ec16ccSHideki Saito 
310f2ec16ccSHideki Saito   // 1.
311f2ec16ccSHideki Saito   PHINode *IV = Lp->getCanonicalInductionVariable();
312f2ec16ccSHideki Saito   if (!IV) {
313d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
314f2ec16ccSHideki Saito     return false;
315f2ec16ccSHideki Saito   }
316f2ec16ccSHideki Saito 
317f2ec16ccSHideki Saito   // 2.
318f2ec16ccSHideki Saito   BasicBlock *Latch = Lp->getLoopLatch();
319f2ec16ccSHideki Saito   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
320f2ec16ccSHideki Saito   if (!LatchBr || LatchBr->isUnconditional()) {
321d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
322f2ec16ccSHideki Saito     return false;
323f2ec16ccSHideki Saito   }
324f2ec16ccSHideki Saito 
325f2ec16ccSHideki Saito   // 3.
326f2ec16ccSHideki Saito   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
327f2ec16ccSHideki Saito   if (!LatchCmp) {
328d34e60caSNicola Zaghen     LLVM_DEBUG(
329d34e60caSNicola Zaghen         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
330f2ec16ccSHideki Saito     return false;
331f2ec16ccSHideki Saito   }
332f2ec16ccSHideki Saito 
333f2ec16ccSHideki Saito   Value *CondOp0 = LatchCmp->getOperand(0);
334f2ec16ccSHideki Saito   Value *CondOp1 = LatchCmp->getOperand(1);
335f2ec16ccSHideki Saito   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
336f2ec16ccSHideki Saito   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
337f2ec16ccSHideki Saito       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
338d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
339f2ec16ccSHideki Saito     return false;
340f2ec16ccSHideki Saito   }
341f2ec16ccSHideki Saito 
342f2ec16ccSHideki Saito   return true;
343f2ec16ccSHideki Saito }
344f2ec16ccSHideki Saito 
345f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p
346f2ec16ccSHideki Saito // OuterLp.
347f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
348f2ec16ccSHideki Saito   if (!isUniformLoop(Lp, OuterLp))
349f2ec16ccSHideki Saito     return false;
350f2ec16ccSHideki Saito 
351f2ec16ccSHideki Saito   // Check if nested loops are uniform.
352f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
353f2ec16ccSHideki Saito     if (!isUniformLoopNest(SubLp, OuterLp))
354f2ec16ccSHideki Saito       return false;
355f2ec16ccSHideki Saito 
356f2ec16ccSHideki Saito   return true;
357f2ec16ccSHideki Saito }
358f2ec16ccSHideki Saito 
3595f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node.
360f2ec16ccSHideki Saito ///
361f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if
362f2ec16ccSHideki Saito /// convert.
363f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) {
364f2ec16ccSHideki Saito   for (PHINode &Phi : BB->phis()) {
365f2ec16ccSHideki Saito     for (Value *V : Phi.incoming_values())
366f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(V))
367f2ec16ccSHideki Saito         if (C->canTrap())
368f2ec16ccSHideki Saito           return false;
369f2ec16ccSHideki Saito   }
370f2ec16ccSHideki Saito   return true;
371f2ec16ccSHideki Saito }
372f2ec16ccSHideki Saito 
373f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
374f2ec16ccSHideki Saito   if (Ty->isPointerTy())
375f2ec16ccSHideki Saito     return DL.getIntPtrType(Ty);
376f2ec16ccSHideki Saito 
377f2ec16ccSHideki Saito   // It is possible that char's or short's overflow when we ask for the loop's
378f2ec16ccSHideki Saito   // trip count, work around this by changing the type size.
379f2ec16ccSHideki Saito   if (Ty->getScalarSizeInBits() < 32)
380f2ec16ccSHideki Saito     return Type::getInt32Ty(Ty->getContext());
381f2ec16ccSHideki Saito 
382f2ec16ccSHideki Saito   return Ty;
383f2ec16ccSHideki Saito }
384f2ec16ccSHideki Saito 
385f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
386f2ec16ccSHideki Saito   Ty0 = convertPointerToIntegerType(DL, Ty0);
387f2ec16ccSHideki Saito   Ty1 = convertPointerToIntegerType(DL, Ty1);
388f2ec16ccSHideki Saito   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
389f2ec16ccSHideki Saito     return Ty0;
390f2ec16ccSHideki Saito   return Ty1;
391f2ec16ccSHideki Saito }
392f2ec16ccSHideki Saito 
3935f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an
394f2ec16ccSHideki Saito /// identified reduction variable.
395f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
396f2ec16ccSHideki Saito                                SmallPtrSetImpl<Value *> &AllowedExit) {
39760a1e4ddSAnna Thomas   // Reductions, Inductions and non-header phis are allowed to have exit users. All
398f2ec16ccSHideki Saito   // other instructions must not have external users.
399f2ec16ccSHideki Saito   if (!AllowedExit.count(Inst))
400f2ec16ccSHideki Saito     // Check that all of the users of the loop are inside the BB.
401f2ec16ccSHideki Saito     for (User *U : Inst->users()) {
402f2ec16ccSHideki Saito       Instruction *UI = cast<Instruction>(U);
403f2ec16ccSHideki Saito       // This user may be a reduction exit value.
404f2ec16ccSHideki Saito       if (!TheLoop->contains(UI)) {
405d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
406f2ec16ccSHideki Saito         return true;
407f2ec16ccSHideki Saito       }
408f2ec16ccSHideki Saito     }
409f2ec16ccSHideki Saito   return false;
410f2ec16ccSHideki Saito }
411f2ec16ccSHideki Saito 
412f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
413f2ec16ccSHideki Saito   const ValueToValueMap &Strides =
414f2ec16ccSHideki Saito       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
415f2ec16ccSHideki Saito 
416*7bedae7dSHiroshi Yamauchi   Function *F = TheLoop->getHeader()->getParent();
417*7bedae7dSHiroshi Yamauchi   bool OptForSize = F->hasOptSize() ||
418*7bedae7dSHiroshi Yamauchi                     llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
419*7bedae7dSHiroshi Yamauchi                                                 PGSOQueryType::IRPass);
420*7bedae7dSHiroshi Yamauchi   bool CanAddPredicate = !OptForSize;
421d1170dbeSSjoerd Meijer   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
422f2ec16ccSHideki Saito   if (Stride == 1 || Stride == -1)
423f2ec16ccSHideki Saito     return Stride;
424f2ec16ccSHideki Saito   return 0;
425f2ec16ccSHideki Saito }
426f2ec16ccSHideki Saito 
427f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) {
428f2ec16ccSHideki Saito   return LAI->isUniform(V);
429f2ec16ccSHideki Saito }
430f2ec16ccSHideki Saito 
431f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() {
432f2ec16ccSHideki Saito   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
433f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
434f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
435f2ec16ccSHideki Saito   bool Result = true;
436f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
437f2ec16ccSHideki Saito 
438f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
439f2ec16ccSHideki Saito     // Check whether the BB terminator is a BranchInst. Any other terminator is
440f2ec16ccSHideki Saito     // not supported yet.
441f2ec16ccSHideki Saito     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
442f2ec16ccSHideki Saito     if (!Br) {
4439e97caf5SRenato Golin       reportVectorizationFailure("Unsupported basic block terminator",
4449e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
445ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
446f2ec16ccSHideki Saito       if (DoExtraAnalysis)
447f2ec16ccSHideki Saito         Result = false;
448f2ec16ccSHideki Saito       else
449f2ec16ccSHideki Saito         return false;
450f2ec16ccSHideki Saito     }
451f2ec16ccSHideki Saito 
452f2ec16ccSHideki Saito     // Check whether the BranchInst is a supported one. Only unconditional
453f2ec16ccSHideki Saito     // branches, conditional branches with an outer loop invariant condition or
454f2ec16ccSHideki Saito     // backedges are supported.
4554e4ecae0SHideki Saito     // FIXME: We skip these checks when VPlan predication is enabled as we
4564e4ecae0SHideki Saito     // want to allow divergent branches. This whole check will be removed
4574e4ecae0SHideki Saito     // once VPlan predication is on by default.
4584e4ecae0SHideki Saito     if (!EnableVPlanPredication && Br && Br->isConditional() &&
459f2ec16ccSHideki Saito         !TheLoop->isLoopInvariant(Br->getCondition()) &&
460f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(0)) &&
461f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(1))) {
4629e97caf5SRenato Golin       reportVectorizationFailure("Unsupported conditional branch",
4639e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
464ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
465f2ec16ccSHideki Saito       if (DoExtraAnalysis)
466f2ec16ccSHideki Saito         Result = false;
467f2ec16ccSHideki Saito       else
468f2ec16ccSHideki Saito         return false;
469f2ec16ccSHideki Saito     }
470f2ec16ccSHideki Saito   }
471f2ec16ccSHideki Saito 
472f2ec16ccSHideki Saito   // Check whether inner loops are uniform. At this point, we only support
473f2ec16ccSHideki Saito   // simple outer loops scenarios with uniform nested loops.
474f2ec16ccSHideki Saito   if (!isUniformLoopNest(TheLoop /*loop nest*/,
475f2ec16ccSHideki Saito                          TheLoop /*context outer loop*/)) {
4769e97caf5SRenato Golin     reportVectorizationFailure("Outer loop contains divergent loops",
4779e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
478ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
479f2ec16ccSHideki Saito     if (DoExtraAnalysis)
480f2ec16ccSHideki Saito       Result = false;
481f2ec16ccSHideki Saito     else
482f2ec16ccSHideki Saito       return false;
483f2ec16ccSHideki Saito   }
484f2ec16ccSHideki Saito 
485ea7f3035SHideki Saito   // Check whether we are able to set up outer loop induction.
486ea7f3035SHideki Saito   if (!setupOuterLoopInductions()) {
4879e97caf5SRenato Golin     reportVectorizationFailure("Unsupported outer loop Phi(s)",
4889e97caf5SRenato Golin                                "Unsupported outer loop Phi(s)",
489ec818d7fSHideki Saito                                "UnsupportedPhi", ORE, TheLoop);
490ea7f3035SHideki Saito     if (DoExtraAnalysis)
491ea7f3035SHideki Saito       Result = false;
492ea7f3035SHideki Saito     else
493ea7f3035SHideki Saito       return false;
494ea7f3035SHideki Saito   }
495ea7f3035SHideki Saito 
496f2ec16ccSHideki Saito   return Result;
497f2ec16ccSHideki Saito }
498f2ec16ccSHideki Saito 
499f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi(
500f2ec16ccSHideki Saito     PHINode *Phi, const InductionDescriptor &ID,
501f2ec16ccSHideki Saito     SmallPtrSetImpl<Value *> &AllowedExit) {
502f2ec16ccSHideki Saito   Inductions[Phi] = ID;
503f2ec16ccSHideki Saito 
504f2ec16ccSHideki Saito   // In case this induction also comes with casts that we know we can ignore
505f2ec16ccSHideki Saito   // in the vectorized loop body, record them here. All casts could be recorded
506f2ec16ccSHideki Saito   // here for ignoring, but suffices to record only the first (as it is the
507f2ec16ccSHideki Saito   // only one that may bw used outside the cast sequence).
508f2ec16ccSHideki Saito   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
509f2ec16ccSHideki Saito   if (!Casts.empty())
510f2ec16ccSHideki Saito     InductionCastsToIgnore.insert(*Casts.begin());
511f2ec16ccSHideki Saito 
512f2ec16ccSHideki Saito   Type *PhiTy = Phi->getType();
513f2ec16ccSHideki Saito   const DataLayout &DL = Phi->getModule()->getDataLayout();
514f2ec16ccSHideki Saito 
515f2ec16ccSHideki Saito   // Get the widest type.
516f2ec16ccSHideki Saito   if (!PhiTy->isFloatingPointTy()) {
517f2ec16ccSHideki Saito     if (!WidestIndTy)
518f2ec16ccSHideki Saito       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
519f2ec16ccSHideki Saito     else
520f2ec16ccSHideki Saito       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
521f2ec16ccSHideki Saito   }
522f2ec16ccSHideki Saito 
523f2ec16ccSHideki Saito   // Int inductions are special because we only allow one IV.
524f2ec16ccSHideki Saito   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
525f2ec16ccSHideki Saito       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
526f2ec16ccSHideki Saito       isa<Constant>(ID.getStartValue()) &&
527f2ec16ccSHideki Saito       cast<Constant>(ID.getStartValue())->isNullValue()) {
528f2ec16ccSHideki Saito 
529f2ec16ccSHideki Saito     // Use the phi node with the widest type as induction. Use the last
530f2ec16ccSHideki Saito     // one if there are multiple (no good reason for doing this other
531f2ec16ccSHideki Saito     // than it is expedient). We've checked that it begins at zero and
532f2ec16ccSHideki Saito     // steps by one, so this is a canonical induction variable.
533f2ec16ccSHideki Saito     if (!PrimaryInduction || PhiTy == WidestIndTy)
534f2ec16ccSHideki Saito       PrimaryInduction = Phi;
535f2ec16ccSHideki Saito   }
536f2ec16ccSHideki Saito 
537f2ec16ccSHideki Saito   // Both the PHI node itself, and the "post-increment" value feeding
538f2ec16ccSHideki Saito   // back into the PHI node may have external users.
539f2ec16ccSHideki Saito   // We can allow those uses, except if the SCEVs we have for them rely
540f2ec16ccSHideki Saito   // on predicates that only hold within the loop, since allowing the exit
5416a1dd77fSAnna Thomas   // currently means re-using this SCEV outside the loop (see PR33706 for more
5426a1dd77fSAnna Thomas   // details).
543f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().isAlwaysTrue()) {
544f2ec16ccSHideki Saito     AllowedExit.insert(Phi);
545f2ec16ccSHideki Saito     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
546f2ec16ccSHideki Saito   }
547f2ec16ccSHideki Saito 
548d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
549f2ec16ccSHideki Saito }
550f2ec16ccSHideki Saito 
551ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() {
552ea7f3035SHideki Saito   BasicBlock *Header = TheLoop->getHeader();
553ea7f3035SHideki Saito 
554ea7f3035SHideki Saito   // Returns true if a given Phi is a supported induction.
555ea7f3035SHideki Saito   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
556ea7f3035SHideki Saito     InductionDescriptor ID;
557ea7f3035SHideki Saito     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
558ea7f3035SHideki Saito         ID.getKind() == InductionDescriptor::IK_IntInduction) {
559ea7f3035SHideki Saito       addInductionPhi(&Phi, ID, AllowedExit);
560ea7f3035SHideki Saito       return true;
561ea7f3035SHideki Saito     } else {
562ea7f3035SHideki Saito       // Bail out for any Phi in the outer loop header that is not a supported
563ea7f3035SHideki Saito       // induction.
564ea7f3035SHideki Saito       LLVM_DEBUG(
565ea7f3035SHideki Saito           dbgs()
566ea7f3035SHideki Saito           << "LV: Found unsupported PHI for outer loop vectorization.\n");
567ea7f3035SHideki Saito       return false;
568ea7f3035SHideki Saito     }
569ea7f3035SHideki Saito   };
570ea7f3035SHideki Saito 
571ea7f3035SHideki Saito   if (llvm::all_of(Header->phis(), isSupportedPhi))
572ea7f3035SHideki Saito     return true;
573ea7f3035SHideki Saito   else
574ea7f3035SHideki Saito     return false;
575ea7f3035SHideki Saito }
576ea7f3035SHideki Saito 
57766c120f0SFrancesco Petrogalli /// Checks if a function is scalarizable according to the TLI, in
57866c120f0SFrancesco Petrogalli /// the sense that it should be vectorized and then expanded in
57966c120f0SFrancesco Petrogalli /// multiple scalarcalls. This is represented in the
58066c120f0SFrancesco Petrogalli /// TLI via mappings that do not specify a vector name, as in the
58166c120f0SFrancesco Petrogalli /// following example:
58266c120f0SFrancesco Petrogalli ///
58366c120f0SFrancesco Petrogalli ///    const VecDesc VecIntrinsics[] = {
58466c120f0SFrancesco Petrogalli ///      {"llvm.phx.abs.i32", "", 4}
58566c120f0SFrancesco Petrogalli ///    };
58666c120f0SFrancesco Petrogalli static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
58766c120f0SFrancesco Petrogalli   const StringRef ScalarName = CI.getCalledFunction()->getName();
58866c120f0SFrancesco Petrogalli   bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
58966c120f0SFrancesco Petrogalli   // Check that all known VFs are not associated to a vector
59066c120f0SFrancesco Petrogalli   // function, i.e. the vector name is emty.
59166c120f0SFrancesco Petrogalli   if (Scalarize)
59266c120f0SFrancesco Petrogalli     for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName);
59366c120f0SFrancesco Petrogalli          VF <= WidestVF; VF *= 2) {
59466c120f0SFrancesco Petrogalli       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
59566c120f0SFrancesco Petrogalli     }
59666c120f0SFrancesco Petrogalli   return Scalarize;
59766c120f0SFrancesco Petrogalli }
59866c120f0SFrancesco Petrogalli 
599f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() {
600f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
601f2ec16ccSHideki Saito 
602f2ec16ccSHideki Saito   // Look for the attribute signaling the absence of NaNs.
603f2ec16ccSHideki Saito   Function &F = *Header->getParent();
604f2ec16ccSHideki Saito   HasFunNoNaNAttr =
605f2ec16ccSHideki Saito       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
606f2ec16ccSHideki Saito 
607f2ec16ccSHideki Saito   // For each block in the loop.
608f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
609f2ec16ccSHideki Saito     // Scan the instructions in the block and look for hazards.
610f2ec16ccSHideki Saito     for (Instruction &I : *BB) {
611f2ec16ccSHideki Saito       if (auto *Phi = dyn_cast<PHINode>(&I)) {
612f2ec16ccSHideki Saito         Type *PhiTy = Phi->getType();
613f2ec16ccSHideki Saito         // Check that this PHI type is allowed.
614f2ec16ccSHideki Saito         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
615f2ec16ccSHideki Saito             !PhiTy->isPointerTy()) {
6169e97caf5SRenato Golin           reportVectorizationFailure("Found a non-int non-pointer PHI",
6179e97caf5SRenato Golin                                      "loop control flow is not understood by vectorizer",
618ec818d7fSHideki Saito                                      "CFGNotUnderstood", ORE, TheLoop);
619f2ec16ccSHideki Saito           return false;
620f2ec16ccSHideki Saito         }
621f2ec16ccSHideki Saito 
622f2ec16ccSHideki Saito         // If this PHINode is not in the header block, then we know that we
623f2ec16ccSHideki Saito         // can convert it to select during if-conversion. No need to check if
624f2ec16ccSHideki Saito         // the PHIs in this block are induction or reduction variables.
625f2ec16ccSHideki Saito         if (BB != Header) {
62660a1e4ddSAnna Thomas           // Non-header phi nodes that have outside uses can be vectorized. Add
62760a1e4ddSAnna Thomas           // them to the list of allowed exits.
62860a1e4ddSAnna Thomas           // Unsafe cyclic dependencies with header phis are identified during
62960a1e4ddSAnna Thomas           // legalization for reduction, induction and first order
63060a1e4ddSAnna Thomas           // recurrences.
631dd18ce45SBjorn Pettersson           AllowedExit.insert(&I);
632f2ec16ccSHideki Saito           continue;
633f2ec16ccSHideki Saito         }
634f2ec16ccSHideki Saito 
635f2ec16ccSHideki Saito         // We only allow if-converted PHIs with exactly two incoming values.
636f2ec16ccSHideki Saito         if (Phi->getNumIncomingValues() != 2) {
6379e97caf5SRenato Golin           reportVectorizationFailure("Found an invalid PHI",
6389e97caf5SRenato Golin               "loop control flow is not understood by vectorizer",
639ec818d7fSHideki Saito               "CFGNotUnderstood", ORE, TheLoop, Phi);
640f2ec16ccSHideki Saito           return false;
641f2ec16ccSHideki Saito         }
642f2ec16ccSHideki Saito 
643f2ec16ccSHideki Saito         RecurrenceDescriptor RedDes;
644f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
645f2ec16ccSHideki Saito                                                  DT)) {
646f2ec16ccSHideki Saito           if (RedDes.hasUnsafeAlgebra())
647f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
648f2ec16ccSHideki Saito           AllowedExit.insert(RedDes.getLoopExitInstr());
649f2ec16ccSHideki Saito           Reductions[Phi] = RedDes;
650f2ec16ccSHideki Saito           continue;
651f2ec16ccSHideki Saito         }
652f2ec16ccSHideki Saito 
653b02b0ad8SAnna Thomas         // TODO: Instead of recording the AllowedExit, it would be good to record the
654b02b0ad8SAnna Thomas         // complementary set: NotAllowedExit. These include (but may not be
655b02b0ad8SAnna Thomas         // limited to):
656b02b0ad8SAnna Thomas         // 1. Reduction phis as they represent the one-before-last value, which
657b02b0ad8SAnna Thomas         // is not available when vectorized
658b02b0ad8SAnna Thomas         // 2. Induction phis and increment when SCEV predicates cannot be used
659b02b0ad8SAnna Thomas         // outside the loop - see addInductionPhi
660b02b0ad8SAnna Thomas         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
661b02b0ad8SAnna Thomas         // outside the loop - see call to hasOutsideLoopUser in the non-phi
662b02b0ad8SAnna Thomas         // handling below
663b02b0ad8SAnna Thomas         // 4. FirstOrderRecurrence phis that can possibly be handled by
664b02b0ad8SAnna Thomas         // extraction.
665b02b0ad8SAnna Thomas         // By recording these, we can then reason about ways to vectorize each
666b02b0ad8SAnna Thomas         // of these NotAllowedExit.
667f2ec16ccSHideki Saito         InductionDescriptor ID;
668f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
669f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
670f2ec16ccSHideki Saito           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
671f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
672f2ec16ccSHideki Saito           continue;
673f2ec16ccSHideki Saito         }
674f2ec16ccSHideki Saito 
675f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
676f2ec16ccSHideki Saito                                                          SinkAfter, DT)) {
6778e0c5f72SAyal Zaks           AllowedExit.insert(Phi);
678f2ec16ccSHideki Saito           FirstOrderRecurrences.insert(Phi);
679f2ec16ccSHideki Saito           continue;
680f2ec16ccSHideki Saito         }
681f2ec16ccSHideki Saito 
682f2ec16ccSHideki Saito         // As a last resort, coerce the PHI to a AddRec expression
683f2ec16ccSHideki Saito         // and re-try classifying it a an induction PHI.
684f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
685f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
686f2ec16ccSHideki Saito           continue;
687f2ec16ccSHideki Saito         }
688f2ec16ccSHideki Saito 
6899e97caf5SRenato Golin         reportVectorizationFailure("Found an unidentified PHI",
6909e97caf5SRenato Golin             "value that could not be identified as "
6919e97caf5SRenato Golin             "reduction is used outside the loop",
692ec818d7fSHideki Saito             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
693f2ec16ccSHideki Saito         return false;
694f2ec16ccSHideki Saito       } // end of PHI handling
695f2ec16ccSHideki Saito 
696f2ec16ccSHideki Saito       // We handle calls that:
697f2ec16ccSHideki Saito       //   * Are debug info intrinsics.
698f2ec16ccSHideki Saito       //   * Have a mapping to an IR intrinsic.
699f2ec16ccSHideki Saito       //   * Have a vector version available.
700f2ec16ccSHideki Saito       auto *CI = dyn_cast<CallInst>(&I);
70166c120f0SFrancesco Petrogalli 
702f2ec16ccSHideki Saito       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
703f2ec16ccSHideki Saito           !isa<DbgInfoIntrinsic>(CI) &&
704f2ec16ccSHideki Saito           !(CI->getCalledFunction() && TLI &&
70566c120f0SFrancesco Petrogalli             (!VFDatabase::getMappings(*CI).empty() ||
70666c120f0SFrancesco Petrogalli              isTLIScalarize(*TLI, *CI)))) {
7077d65fe5cSSanjay Patel         // If the call is a recognized math libary call, it is likely that
7087d65fe5cSSanjay Patel         // we can vectorize it given loosened floating-point constraints.
7097d65fe5cSSanjay Patel         LibFunc Func;
7107d65fe5cSSanjay Patel         bool IsMathLibCall =
7117d65fe5cSSanjay Patel             TLI && CI->getCalledFunction() &&
7127d65fe5cSSanjay Patel             CI->getType()->isFloatingPointTy() &&
7137d65fe5cSSanjay Patel             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
7147d65fe5cSSanjay Patel             TLI->hasOptimizedCodeGen(Func);
7157d65fe5cSSanjay Patel 
7167d65fe5cSSanjay Patel         if (IsMathLibCall) {
7177d65fe5cSSanjay Patel           // TODO: Ideally, we should not use clang-specific language here,
7187d65fe5cSSanjay Patel           // but it's hard to provide meaningful yet generic advice.
7197d65fe5cSSanjay Patel           // Also, should this be guarded by allowExtraAnalysis() and/or be part
7207d65fe5cSSanjay Patel           // of the returned info from isFunctionVectorizable()?
72166c120f0SFrancesco Petrogalli           reportVectorizationFailure(
72266c120f0SFrancesco Petrogalli               "Found a non-intrinsic callsite",
7239e97caf5SRenato Golin               "library call cannot be vectorized. "
7247d65fe5cSSanjay Patel               "Try compiling with -fno-math-errno, -ffast-math, "
7259e97caf5SRenato Golin               "or similar flags",
726ec818d7fSHideki Saito               "CantVectorizeLibcall", ORE, TheLoop, CI);
7277d65fe5cSSanjay Patel         } else {
7289e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
7299e97caf5SRenato Golin                                      "call instruction cannot be vectorized",
730ec818d7fSHideki Saito                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
7317d65fe5cSSanjay Patel         }
732f2ec16ccSHideki Saito         return false;
733f2ec16ccSHideki Saito       }
734f2ec16ccSHideki Saito 
735a066f1f9SSimon Pilgrim       // Some intrinsics have scalar arguments and should be same in order for
736a066f1f9SSimon Pilgrim       // them to be vectorized (i.e. loop invariant).
737a066f1f9SSimon Pilgrim       if (CI) {
738f2ec16ccSHideki Saito         auto *SE = PSE.getSE();
739a066f1f9SSimon Pilgrim         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
740a066f1f9SSimon Pilgrim         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
741a066f1f9SSimon Pilgrim           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
742a066f1f9SSimon Pilgrim             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
7439e97caf5SRenato Golin               reportVectorizationFailure("Found unvectorizable intrinsic",
7449e97caf5SRenato Golin                   "intrinsic instruction cannot be vectorized",
745ec818d7fSHideki Saito                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
746f2ec16ccSHideki Saito               return false;
747f2ec16ccSHideki Saito             }
748f2ec16ccSHideki Saito           }
749a066f1f9SSimon Pilgrim       }
750f2ec16ccSHideki Saito 
751f2ec16ccSHideki Saito       // Check that the instruction return type is vectorizable.
752f2ec16ccSHideki Saito       // Also, we can't vectorize extractelement instructions.
753f2ec16ccSHideki Saito       if ((!VectorType::isValidElementType(I.getType()) &&
754f2ec16ccSHideki Saito            !I.getType()->isVoidTy()) ||
755f2ec16ccSHideki Saito           isa<ExtractElementInst>(I)) {
7569e97caf5SRenato Golin         reportVectorizationFailure("Found unvectorizable type",
7579e97caf5SRenato Golin             "instruction return type cannot be vectorized",
758ec818d7fSHideki Saito             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
759f2ec16ccSHideki Saito         return false;
760f2ec16ccSHideki Saito       }
761f2ec16ccSHideki Saito 
762f2ec16ccSHideki Saito       // Check that the stored type is vectorizable.
763f2ec16ccSHideki Saito       if (auto *ST = dyn_cast<StoreInst>(&I)) {
764f2ec16ccSHideki Saito         Type *T = ST->getValueOperand()->getType();
765f2ec16ccSHideki Saito         if (!VectorType::isValidElementType(T)) {
7669e97caf5SRenato Golin           reportVectorizationFailure("Store instruction cannot be vectorized",
7679e97caf5SRenato Golin                                      "store instruction cannot be vectorized",
768ec818d7fSHideki Saito                                      "CantVectorizeStore", ORE, TheLoop, ST);
769f2ec16ccSHideki Saito           return false;
770f2ec16ccSHideki Saito         }
771f2ec16ccSHideki Saito 
7726452bdd2SWarren Ristow         // For nontemporal stores, check that a nontemporal vector version is
7736452bdd2SWarren Ristow         // supported on the target.
7746452bdd2SWarren Ristow         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
7756452bdd2SWarren Ristow           // Arbitrarily try a vector of 2 elements.
776d2befc66SChristopher Tetreault           auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
7776452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of stored type");
77852e98f62SNikita Popov           if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
7796452bdd2SWarren Ristow             reportVectorizationFailure(
7806452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
7816452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
782ec818d7fSHideki Saito                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
7836452bdd2SWarren Ristow             return false;
7846452bdd2SWarren Ristow           }
7856452bdd2SWarren Ristow         }
7866452bdd2SWarren Ristow 
7876452bdd2SWarren Ristow       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
7886452bdd2SWarren Ristow         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
7896452bdd2SWarren Ristow           // For nontemporal loads, check that a nontemporal vector version is
7906452bdd2SWarren Ristow           // supported on the target (arbitrarily try a vector of 2 elements).
791d2befc66SChristopher Tetreault           auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
7926452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of load type");
79352e98f62SNikita Popov           if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
7946452bdd2SWarren Ristow             reportVectorizationFailure(
7956452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
7966452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
797ec818d7fSHideki Saito                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
7986452bdd2SWarren Ristow             return false;
7996452bdd2SWarren Ristow           }
8006452bdd2SWarren Ristow         }
8016452bdd2SWarren Ristow 
802f2ec16ccSHideki Saito         // FP instructions can allow unsafe algebra, thus vectorizable by
803f2ec16ccSHideki Saito         // non-IEEE-754 compliant SIMD units.
804f2ec16ccSHideki Saito         // This applies to floating-point math operations and calls, not memory
805f2ec16ccSHideki Saito         // operations, shuffles, or casts, as they don't change precision or
806f2ec16ccSHideki Saito         // semantics.
807f2ec16ccSHideki Saito       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
808f2ec16ccSHideki Saito                  !I.isFast()) {
809d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
810f2ec16ccSHideki Saito         Hints->setPotentiallyUnsafe();
811f2ec16ccSHideki Saito       }
812f2ec16ccSHideki Saito 
813f2ec16ccSHideki Saito       // Reduction instructions are allowed to have exit users.
814f2ec16ccSHideki Saito       // All other instructions must not have external users.
815f2ec16ccSHideki Saito       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
816b02b0ad8SAnna Thomas         // We can safely vectorize loops where instructions within the loop are
817b02b0ad8SAnna Thomas         // used outside the loop only if the SCEV predicates within the loop is
818b02b0ad8SAnna Thomas         // same as outside the loop. Allowing the exit means reusing the SCEV
819b02b0ad8SAnna Thomas         // outside the loop.
820b02b0ad8SAnna Thomas         if (PSE.getUnionPredicate().isAlwaysTrue()) {
821b02b0ad8SAnna Thomas           AllowedExit.insert(&I);
822b02b0ad8SAnna Thomas           continue;
823b02b0ad8SAnna Thomas         }
8249e97caf5SRenato Golin         reportVectorizationFailure("Value cannot be used outside the loop",
8259e97caf5SRenato Golin                                    "value cannot be used outside the loop",
826ec818d7fSHideki Saito                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
827f2ec16ccSHideki Saito         return false;
828f2ec16ccSHideki Saito       }
829f2ec16ccSHideki Saito     } // next instr.
830f2ec16ccSHideki Saito   }
831f2ec16ccSHideki Saito 
832f2ec16ccSHideki Saito   if (!PrimaryInduction) {
833f2ec16ccSHideki Saito     if (Inductions.empty()) {
8349e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8359e97caf5SRenato Golin           "loop induction variable could not be identified",
836ec818d7fSHideki Saito           "NoInductionVariable", ORE, TheLoop);
837f2ec16ccSHideki Saito       return false;
8384f27730eSWarren Ristow     } else if (!WidestIndTy) {
8399e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8409e97caf5SRenato Golin           "integer loop induction variable could not be identified",
841ec818d7fSHideki Saito           "NoIntegerInductionVariable", ORE, TheLoop);
8424f27730eSWarren Ristow       return false;
8439e97caf5SRenato Golin     } else {
8449e97caf5SRenato Golin       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
845f2ec16ccSHideki Saito     }
846f2ec16ccSHideki Saito   }
847f2ec16ccSHideki Saito 
8489d24933fSFlorian Hahn   // For first order recurrences, we use the previous value (incoming value from
8499d24933fSFlorian Hahn   // the latch) to check if it dominates all users of the recurrence. Bail out
8509d24933fSFlorian Hahn   // if we have to sink such an instruction for another recurrence, as the
8519d24933fSFlorian Hahn   // dominance requirement may not hold after sinking.
8529d24933fSFlorian Hahn   BasicBlock *LoopLatch = TheLoop->getLoopLatch();
8539d24933fSFlorian Hahn   if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
8549d24933fSFlorian Hahn         Instruction *V =
8559d24933fSFlorian Hahn             cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
8569d24933fSFlorian Hahn         return SinkAfter.find(V) != SinkAfter.end();
8579d24933fSFlorian Hahn       }))
8589d24933fSFlorian Hahn     return false;
8599d24933fSFlorian Hahn 
860f2ec16ccSHideki Saito   // Now we know the widest induction type, check if our found induction
861f2ec16ccSHideki Saito   // is the same size. If it's not, unset it here and InnerLoopVectorizer
862f2ec16ccSHideki Saito   // will create another.
863f2ec16ccSHideki Saito   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
864f2ec16ccSHideki Saito     PrimaryInduction = nullptr;
865f2ec16ccSHideki Saito 
866f2ec16ccSHideki Saito   return true;
867f2ec16ccSHideki Saito }
868f2ec16ccSHideki Saito 
869f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() {
870f2ec16ccSHideki Saito   LAI = &(*GetLAA)(*TheLoop);
871f2ec16ccSHideki Saito   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
872f2ec16ccSHideki Saito   if (LAR) {
873f2ec16ccSHideki Saito     ORE->emit([&]() {
874f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
875f2ec16ccSHideki Saito                                         "loop not vectorized: ", *LAR);
876f2ec16ccSHideki Saito     });
877f2ec16ccSHideki Saito   }
878f2ec16ccSHideki Saito   if (!LAI->canVectorizeMemory())
879f2ec16ccSHideki Saito     return false;
880f2ec16ccSHideki Saito 
8815e9215f0SAnna Thomas   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
8829e97caf5SRenato Golin     reportVectorizationFailure("Stores to a uniform address",
8839e97caf5SRenato Golin         "write to a loop invariant address could not be vectorized",
884ec818d7fSHideki Saito         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
885f2ec16ccSHideki Saito     return false;
886f2ec16ccSHideki Saito   }
887f2ec16ccSHideki Saito   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
888f2ec16ccSHideki Saito   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
889f2ec16ccSHideki Saito 
890f2ec16ccSHideki Saito   return true;
891f2ec16ccSHideki Saito }
892f2ec16ccSHideki Saito 
893f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
894f2ec16ccSHideki Saito   Value *In0 = const_cast<Value *>(V);
895f2ec16ccSHideki Saito   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
896f2ec16ccSHideki Saito   if (!PN)
897f2ec16ccSHideki Saito     return false;
898f2ec16ccSHideki Saito 
899f2ec16ccSHideki Saito   return Inductions.count(PN);
900f2ec16ccSHideki Saito }
901f2ec16ccSHideki Saito 
902f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
903f2ec16ccSHideki Saito   auto *Inst = dyn_cast<Instruction>(V);
904f2ec16ccSHideki Saito   return (Inst && InductionCastsToIgnore.count(Inst));
905f2ec16ccSHideki Saito }
906f2ec16ccSHideki Saito 
907f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
908f2ec16ccSHideki Saito   return isInductionPhi(V) || isCastedInductionVariable(V);
909f2ec16ccSHideki Saito }
910f2ec16ccSHideki Saito 
911f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
912f2ec16ccSHideki Saito   return FirstOrderRecurrences.count(Phi);
913f2ec16ccSHideki Saito }
914f2ec16ccSHideki Saito 
915f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
916f2ec16ccSHideki Saito   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
917f2ec16ccSHideki Saito }
918f2ec16ccSHideki Saito 
919f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated(
920d57d73daSDorit Nuzman     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
921f2ec16ccSHideki Saito   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
922f2ec16ccSHideki Saito 
923f2ec16ccSHideki Saito   for (Instruction &I : *BB) {
924f2ec16ccSHideki Saito     // Check that we don't have a constant expression that can trap as operand.
925f2ec16ccSHideki Saito     for (Value *Operand : I.operands()) {
926f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(Operand))
927f2ec16ccSHideki Saito         if (C->canTrap())
928f2ec16ccSHideki Saito           return false;
929f2ec16ccSHideki Saito     }
93023c11380SFlorian Hahn 
93123c11380SFlorian Hahn     // We can predicate blocks with calls to assume, as long as we drop them in
93223c11380SFlorian Hahn     // case we flatten the CFG via predication.
93323c11380SFlorian Hahn     if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
93423c11380SFlorian Hahn       ConditionalAssumes.insert(&I);
93523c11380SFlorian Hahn       continue;
93623c11380SFlorian Hahn     }
93723c11380SFlorian Hahn 
938f2ec16ccSHideki Saito     // We might be able to hoist the load.
939f2ec16ccSHideki Saito     if (I.mayReadFromMemory()) {
940f2ec16ccSHideki Saito       auto *LI = dyn_cast<LoadInst>(&I);
941f2ec16ccSHideki Saito       if (!LI)
942f2ec16ccSHideki Saito         return false;
943f2ec16ccSHideki Saito       if (!SafePtrs.count(LI->getPointerOperand())) {
944f2ec16ccSHideki Saito         // !llvm.mem.parallel_loop_access implies if-conversion safety.
945f2ec16ccSHideki Saito         // Otherwise, record that the load needs (real or emulated) masking
946f2ec16ccSHideki Saito         // and let the cost model decide.
947d57d73daSDorit Nuzman         if (!IsAnnotatedParallel || PreserveGuards)
948f2ec16ccSHideki Saito           MaskedOp.insert(LI);
949f2ec16ccSHideki Saito         continue;
950f2ec16ccSHideki Saito       }
951f2ec16ccSHideki Saito     }
952f2ec16ccSHideki Saito 
953f2ec16ccSHideki Saito     if (I.mayWriteToMemory()) {
954f2ec16ccSHideki Saito       auto *SI = dyn_cast<StoreInst>(&I);
955f2ec16ccSHideki Saito       if (!SI)
956f2ec16ccSHideki Saito         return false;
957f2ec16ccSHideki Saito       // Predicated store requires some form of masking:
958f2ec16ccSHideki Saito       // 1) masked store HW instruction,
959f2ec16ccSHideki Saito       // 2) emulation via load-blend-store (only if safe and legal to do so,
960f2ec16ccSHideki Saito       //    be aware on the race conditions), or
961f2ec16ccSHideki Saito       // 3) element-by-element predicate check and scalar store.
962f2ec16ccSHideki Saito       MaskedOp.insert(SI);
963f2ec16ccSHideki Saito       continue;
964f2ec16ccSHideki Saito     }
965f2ec16ccSHideki Saito     if (I.mayThrow())
966f2ec16ccSHideki Saito       return false;
967f2ec16ccSHideki Saito   }
968f2ec16ccSHideki Saito 
969f2ec16ccSHideki Saito   return true;
970f2ec16ccSHideki Saito }
971f2ec16ccSHideki Saito 
972f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
973f2ec16ccSHideki Saito   if (!EnableIfConversion) {
9749e97caf5SRenato Golin     reportVectorizationFailure("If-conversion is disabled",
9759e97caf5SRenato Golin                                "if-conversion is disabled",
976ec818d7fSHideki Saito                                "IfConversionDisabled",
977ec818d7fSHideki Saito                                ORE, TheLoop);
978f2ec16ccSHideki Saito     return false;
979f2ec16ccSHideki Saito   }
980f2ec16ccSHideki Saito 
981f2ec16ccSHideki Saito   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
982f2ec16ccSHideki Saito 
983cf3b5559SPhilip Reames   // A list of pointers which are known to be dereferenceable within scope of
984cf3b5559SPhilip Reames   // the loop body for each iteration of the loop which executes.  That is,
985cf3b5559SPhilip Reames   // the memory pointed to can be dereferenced (with the access size implied by
986cf3b5559SPhilip Reames   // the value's type) unconditionally within the loop header without
987cf3b5559SPhilip Reames   // introducing a new fault.
9883bbc71d6SSjoerd Meijer   SmallPtrSet<Value *, 8> SafePointers;
989f2ec16ccSHideki Saito 
990f2ec16ccSHideki Saito   // Collect safe addresses.
991f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
9927403569bSPhilip Reames     if (!blockNeedsPredication(BB)) {
993f2ec16ccSHideki Saito       for (Instruction &I : *BB)
994f2ec16ccSHideki Saito         if (auto *Ptr = getLoadStorePointerOperand(&I))
9953bbc71d6SSjoerd Meijer           SafePointers.insert(Ptr);
9967403569bSPhilip Reames       continue;
9977403569bSPhilip Reames     }
9987403569bSPhilip Reames 
9997403569bSPhilip Reames     // For a block which requires predication, a address may be safe to access
10007403569bSPhilip Reames     // in the loop w/o predication if we can prove dereferenceability facts
10017403569bSPhilip Reames     // sufficient to ensure it'll never fault within the loop. For the moment,
10027403569bSPhilip Reames     // we restrict this to loads; stores are more complicated due to
10037403569bSPhilip Reames     // concurrency restrictions.
10047403569bSPhilip Reames     ScalarEvolution &SE = *PSE.getSE();
10057403569bSPhilip Reames     for (Instruction &I : *BB) {
10067403569bSPhilip Reames       LoadInst *LI = dyn_cast<LoadInst>(&I);
10077403569bSPhilip Reames       if (LI && !mustSuppressSpeculation(*LI) &&
10087403569bSPhilip Reames           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
10093bbc71d6SSjoerd Meijer         SafePointers.insert(LI->getPointerOperand());
10107403569bSPhilip Reames     }
1011f2ec16ccSHideki Saito   }
1012f2ec16ccSHideki Saito 
1013f2ec16ccSHideki Saito   // Collect the blocks that need predication.
1014f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
1015f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
1016f2ec16ccSHideki Saito     // We don't support switch statements inside loops.
1017f2ec16ccSHideki Saito     if (!isa<BranchInst>(BB->getTerminator())) {
10189e97caf5SRenato Golin       reportVectorizationFailure("Loop contains a switch statement",
10199e97caf5SRenato Golin                                  "loop contains a switch statement",
1020ec818d7fSHideki Saito                                  "LoopContainsSwitch", ORE, TheLoop,
1021ec818d7fSHideki Saito                                  BB->getTerminator());
1022f2ec16ccSHideki Saito       return false;
1023f2ec16ccSHideki Saito     }
1024f2ec16ccSHideki Saito 
1025f2ec16ccSHideki Saito     // We must be able to predicate all blocks that need to be predicated.
1026f2ec16ccSHideki Saito     if (blockNeedsPredication(BB)) {
10273bbc71d6SSjoerd Meijer       if (!blockCanBePredicated(BB, SafePointers)) {
10289e97caf5SRenato Golin         reportVectorizationFailure(
10299e97caf5SRenato Golin             "Control flow cannot be substituted for a select",
10309e97caf5SRenato Golin             "control flow cannot be substituted for a select",
1031ec818d7fSHideki Saito             "NoCFGForSelect", ORE, TheLoop,
1032ec818d7fSHideki Saito             BB->getTerminator());
1033f2ec16ccSHideki Saito         return false;
1034f2ec16ccSHideki Saito       }
1035f2ec16ccSHideki Saito     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
10369e97caf5SRenato Golin       reportVectorizationFailure(
10379e97caf5SRenato Golin           "Control flow cannot be substituted for a select",
10389e97caf5SRenato Golin           "control flow cannot be substituted for a select",
1039ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
1040ec818d7fSHideki Saito           BB->getTerminator());
1041f2ec16ccSHideki Saito       return false;
1042f2ec16ccSHideki Saito     }
1043f2ec16ccSHideki Saito   }
1044f2ec16ccSHideki Saito 
1045f2ec16ccSHideki Saito   // We can if-convert this loop.
1046f2ec16ccSHideki Saito   return true;
1047f2ec16ccSHideki Saito }
1048f2ec16ccSHideki Saito 
1049f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG.
1050f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1051f2ec16ccSHideki Saito                                                     bool UseVPlanNativePath) {
1052f2ec16ccSHideki Saito   assert((UseVPlanNativePath || Lp->empty()) &&
1053f2ec16ccSHideki Saito          "VPlan-native path is not enabled.");
1054f2ec16ccSHideki Saito 
1055f2ec16ccSHideki Saito   // TODO: ORE should be improved to show more accurate information when an
1056f2ec16ccSHideki Saito   // outer loop can't be vectorized because a nested loop is not understood or
1057f2ec16ccSHideki Saito   // legal. Something like: "outer_loop_location: loop not vectorized:
1058f2ec16ccSHideki Saito   // (inner_loop_location) loop control flow is not understood by vectorizer".
1059f2ec16ccSHideki Saito 
1060f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1061f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1062f2ec16ccSHideki Saito   bool Result = true;
1063f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1064f2ec16ccSHideki Saito 
1065f2ec16ccSHideki Saito   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1066f2ec16ccSHideki Saito   // be canonicalized.
1067f2ec16ccSHideki Saito   if (!Lp->getLoopPreheader()) {
10689e97caf5SRenato Golin     reportVectorizationFailure("Loop doesn't have a legal pre-header",
10699e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1070ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1071f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1072f2ec16ccSHideki Saito       Result = false;
1073f2ec16ccSHideki Saito     else
1074f2ec16ccSHideki Saito       return false;
1075f2ec16ccSHideki Saito   }
1076f2ec16ccSHideki Saito 
1077f2ec16ccSHideki Saito   // We must have a single backedge.
1078f2ec16ccSHideki Saito   if (Lp->getNumBackEdges() != 1) {
10799e97caf5SRenato Golin     reportVectorizationFailure("The loop must have a single backedge",
10809e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1081ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1082f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1083f2ec16ccSHideki Saito       Result = false;
1084f2ec16ccSHideki Saito     else
1085f2ec16ccSHideki Saito       return false;
1086f2ec16ccSHideki Saito   }
1087f2ec16ccSHideki Saito 
1088f2ec16ccSHideki Saito   // We must have a single exiting block.
1089f2ec16ccSHideki Saito   if (!Lp->getExitingBlock()) {
10909e97caf5SRenato Golin     reportVectorizationFailure("The loop must have an exiting block",
10919e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1092ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1093f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1094f2ec16ccSHideki Saito       Result = false;
1095f2ec16ccSHideki Saito     else
1096f2ec16ccSHideki Saito       return false;
1097f2ec16ccSHideki Saito   }
1098f2ec16ccSHideki Saito 
1099f2ec16ccSHideki Saito   // We only handle bottom-tested loops, i.e. loop in which the condition is
1100f2ec16ccSHideki Saito   // checked at the end of each iteration. With that we can assume that all
1101f2ec16ccSHideki Saito   // instructions in the loop are executed the same number of times.
1102f2ec16ccSHideki Saito   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
11039e97caf5SRenato Golin     reportVectorizationFailure("The exiting block is not the loop latch",
11049e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1105ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1106f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1107f2ec16ccSHideki Saito       Result = false;
1108f2ec16ccSHideki Saito     else
1109f2ec16ccSHideki Saito       return false;
1110f2ec16ccSHideki Saito   }
1111f2ec16ccSHideki Saito 
1112f2ec16ccSHideki Saito   return Result;
1113f2ec16ccSHideki Saito }
1114f2ec16ccSHideki Saito 
1115f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1116f2ec16ccSHideki Saito     Loop *Lp, bool UseVPlanNativePath) {
1117f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1118f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1119f2ec16ccSHideki Saito   bool Result = true;
1120f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1121f2ec16ccSHideki Saito   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1122f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1123f2ec16ccSHideki Saito       Result = false;
1124f2ec16ccSHideki Saito     else
1125f2ec16ccSHideki Saito       return false;
1126f2ec16ccSHideki Saito   }
1127f2ec16ccSHideki Saito 
1128f2ec16ccSHideki Saito   // Recursively check whether the loop control flow of nested loops is
1129f2ec16ccSHideki Saito   // understood.
1130f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
1131f2ec16ccSHideki Saito     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1132f2ec16ccSHideki Saito       if (DoExtraAnalysis)
1133f2ec16ccSHideki Saito         Result = false;
1134f2ec16ccSHideki Saito       else
1135f2ec16ccSHideki Saito         return false;
1136f2ec16ccSHideki Saito     }
1137f2ec16ccSHideki Saito 
1138f2ec16ccSHideki Saito   return Result;
1139f2ec16ccSHideki Saito }
1140f2ec16ccSHideki Saito 
1141f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1142f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1143f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1144f2ec16ccSHideki Saito   bool Result = true;
1145f2ec16ccSHideki Saito 
1146f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1147f2ec16ccSHideki Saito   // Check whether the loop-related control flow in the loop nest is expected by
1148f2ec16ccSHideki Saito   // vectorizer.
1149f2ec16ccSHideki Saito   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1150f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1151f2ec16ccSHideki Saito       Result = false;
1152f2ec16ccSHideki Saito     else
1153f2ec16ccSHideki Saito       return false;
1154f2ec16ccSHideki Saito   }
1155f2ec16ccSHideki Saito 
1156f2ec16ccSHideki Saito   // We need to have a loop header.
1157d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1158f2ec16ccSHideki Saito                     << '\n');
1159f2ec16ccSHideki Saito 
1160f2ec16ccSHideki Saito   // Specific checks for outer loops. We skip the remaining legal checks at this
1161f2ec16ccSHideki Saito   // point because they don't support outer loops.
1162f2ec16ccSHideki Saito   if (!TheLoop->empty()) {
1163f2ec16ccSHideki Saito     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1164f2ec16ccSHideki Saito 
1165f2ec16ccSHideki Saito     if (!canVectorizeOuterLoop()) {
11669e97caf5SRenato Golin       reportVectorizationFailure("Unsupported outer loop",
11679e97caf5SRenato Golin                                  "unsupported outer loop",
1168ec818d7fSHideki Saito                                  "UnsupportedOuterLoop",
1169ec818d7fSHideki Saito                                  ORE, TheLoop);
1170f2ec16ccSHideki Saito       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1171f2ec16ccSHideki Saito       // outer loops.
1172f2ec16ccSHideki Saito       return false;
1173f2ec16ccSHideki Saito     }
1174f2ec16ccSHideki Saito 
1175d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1176f2ec16ccSHideki Saito     return Result;
1177f2ec16ccSHideki Saito   }
1178f2ec16ccSHideki Saito 
1179f2ec16ccSHideki Saito   assert(TheLoop->empty() && "Inner loop expected.");
1180f2ec16ccSHideki Saito   // Check if we can if-convert non-single-bb loops.
1181f2ec16ccSHideki Saito   unsigned NumBlocks = TheLoop->getNumBlocks();
1182f2ec16ccSHideki Saito   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1183d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1184f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1185f2ec16ccSHideki Saito       Result = false;
1186f2ec16ccSHideki Saito     else
1187f2ec16ccSHideki Saito       return false;
1188f2ec16ccSHideki Saito   }
1189f2ec16ccSHideki Saito 
1190f2ec16ccSHideki Saito   // Check if we can vectorize the instructions and CFG in this loop.
1191f2ec16ccSHideki Saito   if (!canVectorizeInstrs()) {
1192d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1193f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1194f2ec16ccSHideki Saito       Result = false;
1195f2ec16ccSHideki Saito     else
1196f2ec16ccSHideki Saito       return false;
1197f2ec16ccSHideki Saito   }
1198f2ec16ccSHideki Saito 
1199f2ec16ccSHideki Saito   // Go over each instruction and look at memory deps.
1200f2ec16ccSHideki Saito   if (!canVectorizeMemory()) {
1201d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1202f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1203f2ec16ccSHideki Saito       Result = false;
1204f2ec16ccSHideki Saito     else
1205f2ec16ccSHideki Saito       return false;
1206f2ec16ccSHideki Saito   }
1207f2ec16ccSHideki Saito 
1208d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1209f2ec16ccSHideki Saito                     << (LAI->getRuntimePointerChecking()->Need
1210f2ec16ccSHideki Saito                             ? " (with a runtime bound check)"
1211f2ec16ccSHideki Saito                             : "")
1212f2ec16ccSHideki Saito                     << "!\n");
1213f2ec16ccSHideki Saito 
1214f2ec16ccSHideki Saito   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1215f2ec16ccSHideki Saito   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1216f2ec16ccSHideki Saito     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1217f2ec16ccSHideki Saito 
1218f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
12199e97caf5SRenato Golin     reportVectorizationFailure("Too many SCEV checks needed",
12209e97caf5SRenato Golin         "Too many SCEV assumptions need to be made and checked at runtime",
1221ec818d7fSHideki Saito         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1222f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1223f2ec16ccSHideki Saito       Result = false;
1224f2ec16ccSHideki Saito     else
1225f2ec16ccSHideki Saito       return false;
1226f2ec16ccSHideki Saito   }
1227f2ec16ccSHideki Saito 
1228f2ec16ccSHideki Saito   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1229f2ec16ccSHideki Saito   // we can vectorize, and at this point we don't have any other mem analysis
1230f2ec16ccSHideki Saito   // which may limit our maximum vectorization factor, so just return true with
1231f2ec16ccSHideki Saito   // no restrictions.
1232f2ec16ccSHideki Saito   return Result;
1233f2ec16ccSHideki Saito }
1234f2ec16ccSHideki Saito 
1235d57d73daSDorit Nuzman bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1236b0b5312eSAyal Zaks 
1237b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1238b0b5312eSAyal Zaks 
1239d15df0edSAyal Zaks   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1240b0b5312eSAyal Zaks 
1241d0d38df0SDavid Green   for (auto &Reduction : getReductionVars())
1242d15df0edSAyal Zaks     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1243d15df0edSAyal Zaks 
1244d15df0edSAyal Zaks   // TODO: handle non-reduction outside users when tail is folded by masking.
1245b0b5312eSAyal Zaks   for (auto *AE : AllowedExit) {
1246d15df0edSAyal Zaks     // Check that all users of allowed exit values are inside the loop or
1247d15df0edSAyal Zaks     // are the live-out of a reduction.
1248d15df0edSAyal Zaks     if (ReductionLiveOuts.count(AE))
1249d15df0edSAyal Zaks       continue;
1250b0b5312eSAyal Zaks     for (User *U : AE->users()) {
1251b0b5312eSAyal Zaks       Instruction *UI = cast<Instruction>(U);
1252b0b5312eSAyal Zaks       if (TheLoop->contains(UI))
1253b0b5312eSAyal Zaks         continue;
12549e97caf5SRenato Golin       reportVectorizationFailure(
12559e97caf5SRenato Golin           "Cannot fold tail by masking, loop has an outside user for",
12569e97caf5SRenato Golin           "Cannot fold tail by masking in the presence of live outs.",
1257ec818d7fSHideki Saito           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1258b0b5312eSAyal Zaks       return false;
1259b0b5312eSAyal Zaks     }
1260b0b5312eSAyal Zaks   }
1261b0b5312eSAyal Zaks 
1262b0b5312eSAyal Zaks   // The list of pointers that we can safely read and write to remains empty.
1263b0b5312eSAyal Zaks   SmallPtrSet<Value *, 8> SafePointers;
1264b0b5312eSAyal Zaks 
1265b0b5312eSAyal Zaks   // Check and mark all blocks for predication, including those that ordinarily
1266b0b5312eSAyal Zaks   // do not need predication such as the header block.
1267b0b5312eSAyal Zaks   for (BasicBlock *BB : TheLoop->blocks()) {
1268d57d73daSDorit Nuzman     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
12699e97caf5SRenato Golin       reportVectorizationFailure(
12709e97caf5SRenato Golin           "Cannot fold tail by masking as required",
12719e97caf5SRenato Golin           "control flow cannot be substituted for a select",
1272ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
1273ec818d7fSHideki Saito           BB->getTerminator());
1274b0b5312eSAyal Zaks       return false;
1275b0b5312eSAyal Zaks     }
1276b0b5312eSAyal Zaks   }
1277b0b5312eSAyal Zaks 
1278b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1279b0b5312eSAyal Zaks   return true;
1280b0b5312eSAyal Zaks }
1281b0b5312eSAyal Zaks 
1282f2ec16ccSHideki Saito } // namespace llvm
1283