1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===//
2f2ec16ccSHideki Saito //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f2ec16ccSHideki Saito //
7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===//
8f2ec16ccSHideki Saito //
9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code
10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time.
11f2ec16ccSHideki Saito //
12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis
13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there
14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first).
15f2ec16ccSHideki Saito //
16*cc529285SSimon Pilgrim 
17f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
187403569bSPhilip Reames #include "llvm/Analysis/Loads.h"
19a5f1f9c9SSimon Pilgrim #include "llvm/Analysis/LoopInfo.h"
20*cc529285SSimon Pilgrim #include "llvm/Analysis/TargetLibraryInfo.h"
217403569bSPhilip Reames #include "llvm/Analysis/ValueTracking.h"
22f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h"
23f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h"
2423c11380SFlorian Hahn #include "llvm/IR/PatternMatch.h"
257bedae7dSHiroshi Yamauchi #include "llvm/Transforms/Utils/SizeOpts.h"
2623c11380SFlorian Hahn #include "llvm/Transforms/Vectorize/LoopVectorize.h"
27f2ec16ccSHideki Saito 
28f2ec16ccSHideki Saito using namespace llvm;
2923c11380SFlorian Hahn using namespace PatternMatch;
30f2ec16ccSHideki Saito 
31f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize"
32f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME
33f2ec16ccSHideki Saito 
344e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication;
354e4ecae0SHideki Saito 
36f2ec16ccSHideki Saito static cl::opt<bool>
37f2ec16ccSHideki Saito     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
38f2ec16ccSHideki Saito                        cl::desc("Enable if-conversion during vectorization."));
39f2ec16ccSHideki Saito 
40f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
41f2ec16ccSHideki Saito     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
42f2ec16ccSHideki Saito     cl::desc("The maximum allowed number of runtime memory checks with a "
43f2ec16ccSHideki Saito              "vectorize(enable) pragma."));
44f2ec16ccSHideki Saito 
45f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
46f2ec16ccSHideki Saito     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
47f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed."));
48f2ec16ccSHideki Saito 
49f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
50f2ec16ccSHideki Saito     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
51f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed with a "
52f2ec16ccSHideki Saito              "vectorize(enable) pragma"));
53f2ec16ccSHideki Saito 
54f2ec16ccSHideki Saito /// Maximum vectorization interleave count.
55f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16;
56f2ec16ccSHideki Saito 
57f2ec16ccSHideki Saito namespace llvm {
58f2ec16ccSHideki Saito 
59f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) {
60f2ec16ccSHideki Saito   switch (Kind) {
61f2ec16ccSHideki Saito   case HK_WIDTH:
62f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
63f2ec16ccSHideki Saito   case HK_UNROLL:
64f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
65f2ec16ccSHideki Saito   case HK_FORCE:
66f2ec16ccSHideki Saito     return (Val <= 1);
67f2ec16ccSHideki Saito   case HK_ISVECTORIZED:
6820b198ecSSjoerd Meijer   case HK_PREDICATE:
69f2ec16ccSHideki Saito     return (Val == 0 || Val == 1);
70f2ec16ccSHideki Saito   }
71f2ec16ccSHideki Saito   return false;
72f2ec16ccSHideki Saito }
73f2ec16ccSHideki Saito 
74d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
75d4eb13c8SMichael Kruse                                        bool InterleaveOnlyWhenForced,
76f2ec16ccSHideki Saito                                        OptimizationRemarkEmitter &ORE)
77f2ec16ccSHideki Saito     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
78d4eb13c8SMichael Kruse       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
79f2ec16ccSHideki Saito       Force("vectorize.enable", FK_Undefined, HK_FORCE),
8020b198ecSSjoerd Meijer       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
81cb47b878SSjoerd Meijer       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L),
8220b198ecSSjoerd Meijer       ORE(ORE) {
83f2ec16ccSHideki Saito   // Populate values with existing loop metadata.
84f2ec16ccSHideki Saito   getHintsFromMetadata();
85f2ec16ccSHideki Saito 
86f2ec16ccSHideki Saito   // force-vector-interleave overrides DisableInterleaving.
87f2ec16ccSHideki Saito   if (VectorizerParams::isInterleaveForced())
88f2ec16ccSHideki Saito     Interleave.Value = VectorizerParams::VectorizationInterleave;
89f2ec16ccSHideki Saito 
90f2ec16ccSHideki Saito   if (IsVectorized.Value != 1)
91f2ec16ccSHideki Saito     // If the vectorization width and interleaving count are both 1 then
92f2ec16ccSHideki Saito     // consider the loop to have been already vectorized because there's
93f2ec16ccSHideki Saito     // nothing more that we can do.
94f2ec16ccSHideki Saito     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
95d4eb13c8SMichael Kruse   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
96f2ec16ccSHideki Saito              << "LV: Interleaving disabled by the pass manager\n");
97f2ec16ccSHideki Saito }
98f2ec16ccSHideki Saito 
9977a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() {
10077a614a6SMichael Kruse   LLVMContext &Context = TheLoop->getHeader()->getContext();
10177a614a6SMichael Kruse 
10277a614a6SMichael Kruse   MDNode *IsVectorizedMD = MDNode::get(
10377a614a6SMichael Kruse       Context,
10477a614a6SMichael Kruse       {MDString::get(Context, "llvm.loop.isvectorized"),
10577a614a6SMichael Kruse        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
10677a614a6SMichael Kruse   MDNode *LoopID = TheLoop->getLoopID();
10777a614a6SMichael Kruse   MDNode *NewLoopID =
10877a614a6SMichael Kruse       makePostTransformationMetadata(Context, LoopID,
10977a614a6SMichael Kruse                                      {Twine(Prefix(), "vectorize.").str(),
11077a614a6SMichael Kruse                                       Twine(Prefix(), "interleave.").str()},
11177a614a6SMichael Kruse                                      {IsVectorizedMD});
11277a614a6SMichael Kruse   TheLoop->setLoopID(NewLoopID);
11377a614a6SMichael Kruse 
11477a614a6SMichael Kruse   // Update internal cache.
11577a614a6SMichael Kruse   IsVectorized.Value = 1;
11677a614a6SMichael Kruse }
11777a614a6SMichael Kruse 
118d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization(
119d4eb13c8SMichael Kruse     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
120f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled) {
121d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
122f2ec16ccSHideki Saito     emitRemarkWithHints();
123f2ec16ccSHideki Saito     return false;
124f2ec16ccSHideki Saito   }
125f2ec16ccSHideki Saito 
126d4eb13c8SMichael Kruse   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
127d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
128f2ec16ccSHideki Saito     emitRemarkWithHints();
129f2ec16ccSHideki Saito     return false;
130f2ec16ccSHideki Saito   }
131f2ec16ccSHideki Saito 
132f2ec16ccSHideki Saito   if (getIsVectorized() == 1) {
133d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
134f2ec16ccSHideki Saito     // FIXME: Add interleave.disable metadata. This will allow
135f2ec16ccSHideki Saito     // vectorize.disable to be used without disabling the pass and errors
136f2ec16ccSHideki Saito     // to differentiate between disabled vectorization and a width of 1.
137f2ec16ccSHideki Saito     ORE.emit([&]() {
138f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
139f2ec16ccSHideki Saito                                         "AllDisabled", L->getStartLoc(),
140f2ec16ccSHideki Saito                                         L->getHeader())
141f2ec16ccSHideki Saito              << "loop not vectorized: vectorization and interleaving are "
142f2ec16ccSHideki Saito                 "explicitly disabled, or the loop has already been "
143f2ec16ccSHideki Saito                 "vectorized";
144f2ec16ccSHideki Saito     });
145f2ec16ccSHideki Saito     return false;
146f2ec16ccSHideki Saito   }
147f2ec16ccSHideki Saito 
148f2ec16ccSHideki Saito   return true;
149f2ec16ccSHideki Saito }
150f2ec16ccSHideki Saito 
151f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const {
152f2ec16ccSHideki Saito   using namespace ore;
153f2ec16ccSHideki Saito 
154f2ec16ccSHideki Saito   ORE.emit([&]() {
155f2ec16ccSHideki Saito     if (Force.Value == LoopVectorizeHints::FK_Disabled)
156f2ec16ccSHideki Saito       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
157f2ec16ccSHideki Saito                                       TheLoop->getStartLoc(),
158f2ec16ccSHideki Saito                                       TheLoop->getHeader())
159f2ec16ccSHideki Saito              << "loop not vectorized: vectorization is explicitly disabled";
160f2ec16ccSHideki Saito     else {
161f2ec16ccSHideki Saito       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
162f2ec16ccSHideki Saito                                  TheLoop->getStartLoc(), TheLoop->getHeader());
163f2ec16ccSHideki Saito       R << "loop not vectorized";
164f2ec16ccSHideki Saito       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
165f2ec16ccSHideki Saito         R << " (Force=" << NV("Force", true);
166f2ec16ccSHideki Saito         if (Width.Value != 0)
167f2ec16ccSHideki Saito           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
168f2ec16ccSHideki Saito         if (Interleave.Value != 0)
169f2ec16ccSHideki Saito           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
170f2ec16ccSHideki Saito         R << ")";
171f2ec16ccSHideki Saito       }
172f2ec16ccSHideki Saito       return R;
173f2ec16ccSHideki Saito     }
174f2ec16ccSHideki Saito   });
175f2ec16ccSHideki Saito }
176f2ec16ccSHideki Saito 
177f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
178f2ec16ccSHideki Saito   if (getWidth() == 1)
179f2ec16ccSHideki Saito     return LV_NAME;
180f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled)
181f2ec16ccSHideki Saito     return LV_NAME;
182f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
183f2ec16ccSHideki Saito     return LV_NAME;
184f2ec16ccSHideki Saito   return OptimizationRemarkAnalysis::AlwaysPrint;
185f2ec16ccSHideki Saito }
186f2ec16ccSHideki Saito 
187f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() {
188f2ec16ccSHideki Saito   MDNode *LoopID = TheLoop->getLoopID();
189f2ec16ccSHideki Saito   if (!LoopID)
190f2ec16ccSHideki Saito     return;
191f2ec16ccSHideki Saito 
192f2ec16ccSHideki Saito   // First operand should refer to the loop id itself.
193f2ec16ccSHideki Saito   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
194f2ec16ccSHideki Saito   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
195f2ec16ccSHideki Saito 
196f2ec16ccSHideki Saito   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
197f2ec16ccSHideki Saito     const MDString *S = nullptr;
198f2ec16ccSHideki Saito     SmallVector<Metadata *, 4> Args;
199f2ec16ccSHideki Saito 
200f2ec16ccSHideki Saito     // The expected hint is either a MDString or a MDNode with the first
201f2ec16ccSHideki Saito     // operand a MDString.
202f2ec16ccSHideki Saito     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
203f2ec16ccSHideki Saito       if (!MD || MD->getNumOperands() == 0)
204f2ec16ccSHideki Saito         continue;
205f2ec16ccSHideki Saito       S = dyn_cast<MDString>(MD->getOperand(0));
206f2ec16ccSHideki Saito       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
207f2ec16ccSHideki Saito         Args.push_back(MD->getOperand(i));
208f2ec16ccSHideki Saito     } else {
209f2ec16ccSHideki Saito       S = dyn_cast<MDString>(LoopID->getOperand(i));
210f2ec16ccSHideki Saito       assert(Args.size() == 0 && "too many arguments for MDString");
211f2ec16ccSHideki Saito     }
212f2ec16ccSHideki Saito 
213f2ec16ccSHideki Saito     if (!S)
214f2ec16ccSHideki Saito       continue;
215f2ec16ccSHideki Saito 
216f2ec16ccSHideki Saito     // Check if the hint starts with the loop metadata prefix.
217f2ec16ccSHideki Saito     StringRef Name = S->getString();
218f2ec16ccSHideki Saito     if (Args.size() == 1)
219f2ec16ccSHideki Saito       setHint(Name, Args[0]);
220f2ec16ccSHideki Saito   }
221f2ec16ccSHideki Saito }
222f2ec16ccSHideki Saito 
223f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
224f2ec16ccSHideki Saito   if (!Name.startswith(Prefix()))
225f2ec16ccSHideki Saito     return;
226f2ec16ccSHideki Saito   Name = Name.substr(Prefix().size(), StringRef::npos);
227f2ec16ccSHideki Saito 
228f2ec16ccSHideki Saito   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
229f2ec16ccSHideki Saito   if (!C)
230f2ec16ccSHideki Saito     return;
231f2ec16ccSHideki Saito   unsigned Val = C->getZExtValue();
232f2ec16ccSHideki Saito 
23320b198ecSSjoerd Meijer   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
234f2ec16ccSHideki Saito   for (auto H : Hints) {
235f2ec16ccSHideki Saito     if (Name == H->Name) {
236f2ec16ccSHideki Saito       if (H->validate(Val))
237f2ec16ccSHideki Saito         H->Value = Val;
238f2ec16ccSHideki Saito       else
239d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
240f2ec16ccSHideki Saito       break;
241f2ec16ccSHideki Saito     }
242f2ec16ccSHideki Saito   }
243f2ec16ccSHideki Saito }
244f2ec16ccSHideki Saito 
245f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet(
246f2ec16ccSHideki Saito     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
247f2ec16ccSHideki Saito   const char *PassName = Hints.vectorizeAnalysisPassName();
248f2ec16ccSHideki Saito   bool Failed = false;
249f2ec16ccSHideki Saito   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
250f2ec16ccSHideki Saito     ORE.emit([&]() {
251f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisFPCommute(
252f2ec16ccSHideki Saito                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
253f2ec16ccSHideki Saito                  UnsafeAlgebraInst->getParent())
254f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
255f2ec16ccSHideki Saito                 "floating-point operations";
256f2ec16ccSHideki Saito     });
257f2ec16ccSHideki Saito     Failed = true;
258f2ec16ccSHideki Saito   }
259f2ec16ccSHideki Saito 
260f2ec16ccSHideki Saito   // Test if runtime memcheck thresholds are exceeded.
261f2ec16ccSHideki Saito   bool PragmaThresholdReached =
262f2ec16ccSHideki Saito       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
263f2ec16ccSHideki Saito   bool ThresholdReached =
264f2ec16ccSHideki Saito       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
265f2ec16ccSHideki Saito   if ((ThresholdReached && !Hints.allowReordering()) ||
266f2ec16ccSHideki Saito       PragmaThresholdReached) {
267f2ec16ccSHideki Saito     ORE.emit([&]() {
268f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
269f2ec16ccSHideki Saito                                                 L->getStartLoc(),
270f2ec16ccSHideki Saito                                                 L->getHeader())
271f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
272f2ec16ccSHideki Saito                 "memory operations";
273f2ec16ccSHideki Saito     });
274d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
275f2ec16ccSHideki Saito     Failed = true;
276f2ec16ccSHideki Saito   }
277f2ec16ccSHideki Saito 
278f2ec16ccSHideki Saito   return Failed;
279f2ec16ccSHideki Saito }
280f2ec16ccSHideki Saito 
281f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop
282f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
283f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is
284f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is
285f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions:
286f2ec16ccSHideki Saito //   1) it has a canonical IV (starting from 0 and with stride 1),
287f2ec16ccSHideki Saito //   2) its latch terminator is a conditional branch and,
288f2ec16ccSHideki Saito //   3) its latch condition is a compare instruction whose operands are the
289f2ec16ccSHideki Saito //      canonical IV and an OuterLp invariant.
290f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not
291f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity.
292f2ec16ccSHideki Saito //
293f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation
294f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop
295f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and
296f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for
297f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we
298f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer
299f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit
300f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away
301f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not
302f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate
303f2ec16ccSHideki Saito // function that is only executed once for each \p Lp.
304f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
305f2ec16ccSHideki Saito   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
306f2ec16ccSHideki Saito 
307f2ec16ccSHideki Saito   // If Lp is the outer loop, it's uniform by definition.
308f2ec16ccSHideki Saito   if (Lp == OuterLp)
309f2ec16ccSHideki Saito     return true;
310f2ec16ccSHideki Saito   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
311f2ec16ccSHideki Saito 
312f2ec16ccSHideki Saito   // 1.
313f2ec16ccSHideki Saito   PHINode *IV = Lp->getCanonicalInductionVariable();
314f2ec16ccSHideki Saito   if (!IV) {
315d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
316f2ec16ccSHideki Saito     return false;
317f2ec16ccSHideki Saito   }
318f2ec16ccSHideki Saito 
319f2ec16ccSHideki Saito   // 2.
320f2ec16ccSHideki Saito   BasicBlock *Latch = Lp->getLoopLatch();
321f2ec16ccSHideki Saito   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
322f2ec16ccSHideki Saito   if (!LatchBr || LatchBr->isUnconditional()) {
323d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
324f2ec16ccSHideki Saito     return false;
325f2ec16ccSHideki Saito   }
326f2ec16ccSHideki Saito 
327f2ec16ccSHideki Saito   // 3.
328f2ec16ccSHideki Saito   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
329f2ec16ccSHideki Saito   if (!LatchCmp) {
330d34e60caSNicola Zaghen     LLVM_DEBUG(
331d34e60caSNicola Zaghen         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
332f2ec16ccSHideki Saito     return false;
333f2ec16ccSHideki Saito   }
334f2ec16ccSHideki Saito 
335f2ec16ccSHideki Saito   Value *CondOp0 = LatchCmp->getOperand(0);
336f2ec16ccSHideki Saito   Value *CondOp1 = LatchCmp->getOperand(1);
337f2ec16ccSHideki Saito   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
338f2ec16ccSHideki Saito   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
339f2ec16ccSHideki Saito       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
340d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
341f2ec16ccSHideki Saito     return false;
342f2ec16ccSHideki Saito   }
343f2ec16ccSHideki Saito 
344f2ec16ccSHideki Saito   return true;
345f2ec16ccSHideki Saito }
346f2ec16ccSHideki Saito 
347f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p
348f2ec16ccSHideki Saito // OuterLp.
349f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
350f2ec16ccSHideki Saito   if (!isUniformLoop(Lp, OuterLp))
351f2ec16ccSHideki Saito     return false;
352f2ec16ccSHideki Saito 
353f2ec16ccSHideki Saito   // Check if nested loops are uniform.
354f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
355f2ec16ccSHideki Saito     if (!isUniformLoopNest(SubLp, OuterLp))
356f2ec16ccSHideki Saito       return false;
357f2ec16ccSHideki Saito 
358f2ec16ccSHideki Saito   return true;
359f2ec16ccSHideki Saito }
360f2ec16ccSHideki Saito 
3615f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node.
362f2ec16ccSHideki Saito ///
363f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if
364f2ec16ccSHideki Saito /// convert.
365f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) {
366f2ec16ccSHideki Saito   for (PHINode &Phi : BB->phis()) {
367f2ec16ccSHideki Saito     for (Value *V : Phi.incoming_values())
368f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(V))
369f2ec16ccSHideki Saito         if (C->canTrap())
370f2ec16ccSHideki Saito           return false;
371f2ec16ccSHideki Saito   }
372f2ec16ccSHideki Saito   return true;
373f2ec16ccSHideki Saito }
374f2ec16ccSHideki Saito 
375f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
376f2ec16ccSHideki Saito   if (Ty->isPointerTy())
377f2ec16ccSHideki Saito     return DL.getIntPtrType(Ty);
378f2ec16ccSHideki Saito 
379f2ec16ccSHideki Saito   // It is possible that char's or short's overflow when we ask for the loop's
380f2ec16ccSHideki Saito   // trip count, work around this by changing the type size.
381f2ec16ccSHideki Saito   if (Ty->getScalarSizeInBits() < 32)
382f2ec16ccSHideki Saito     return Type::getInt32Ty(Ty->getContext());
383f2ec16ccSHideki Saito 
384f2ec16ccSHideki Saito   return Ty;
385f2ec16ccSHideki Saito }
386f2ec16ccSHideki Saito 
387f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
388f2ec16ccSHideki Saito   Ty0 = convertPointerToIntegerType(DL, Ty0);
389f2ec16ccSHideki Saito   Ty1 = convertPointerToIntegerType(DL, Ty1);
390f2ec16ccSHideki Saito   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
391f2ec16ccSHideki Saito     return Ty0;
392f2ec16ccSHideki Saito   return Ty1;
393f2ec16ccSHideki Saito }
394f2ec16ccSHideki Saito 
3955f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an
396f2ec16ccSHideki Saito /// identified reduction variable.
397f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
398f2ec16ccSHideki Saito                                SmallPtrSetImpl<Value *> &AllowedExit) {
39960a1e4ddSAnna Thomas   // Reductions, Inductions and non-header phis are allowed to have exit users. All
400f2ec16ccSHideki Saito   // other instructions must not have external users.
401f2ec16ccSHideki Saito   if (!AllowedExit.count(Inst))
402f2ec16ccSHideki Saito     // Check that all of the users of the loop are inside the BB.
403f2ec16ccSHideki Saito     for (User *U : Inst->users()) {
404f2ec16ccSHideki Saito       Instruction *UI = cast<Instruction>(U);
405f2ec16ccSHideki Saito       // This user may be a reduction exit value.
406f2ec16ccSHideki Saito       if (!TheLoop->contains(UI)) {
407d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
408f2ec16ccSHideki Saito         return true;
409f2ec16ccSHideki Saito       }
410f2ec16ccSHideki Saito     }
411f2ec16ccSHideki Saito   return false;
412f2ec16ccSHideki Saito }
413f2ec16ccSHideki Saito 
414f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
415f2ec16ccSHideki Saito   const ValueToValueMap &Strides =
416f2ec16ccSHideki Saito       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
417f2ec16ccSHideki Saito 
4187bedae7dSHiroshi Yamauchi   Function *F = TheLoop->getHeader()->getParent();
4197bedae7dSHiroshi Yamauchi   bool OptForSize = F->hasOptSize() ||
4207bedae7dSHiroshi Yamauchi                     llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
4217bedae7dSHiroshi Yamauchi                                                 PGSOQueryType::IRPass);
4227bedae7dSHiroshi Yamauchi   bool CanAddPredicate = !OptForSize;
423d1170dbeSSjoerd Meijer   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
424f2ec16ccSHideki Saito   if (Stride == 1 || Stride == -1)
425f2ec16ccSHideki Saito     return Stride;
426f2ec16ccSHideki Saito   return 0;
427f2ec16ccSHideki Saito }
428f2ec16ccSHideki Saito 
429f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) {
430f2ec16ccSHideki Saito   return LAI->isUniform(V);
431f2ec16ccSHideki Saito }
432f2ec16ccSHideki Saito 
433f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() {
434f2ec16ccSHideki Saito   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
435f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
436f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
437f2ec16ccSHideki Saito   bool Result = true;
438f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
439f2ec16ccSHideki Saito 
440f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
441f2ec16ccSHideki Saito     // Check whether the BB terminator is a BranchInst. Any other terminator is
442f2ec16ccSHideki Saito     // not supported yet.
443f2ec16ccSHideki Saito     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
444f2ec16ccSHideki Saito     if (!Br) {
4459e97caf5SRenato Golin       reportVectorizationFailure("Unsupported basic block terminator",
4469e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
447ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
448f2ec16ccSHideki Saito       if (DoExtraAnalysis)
449f2ec16ccSHideki Saito         Result = false;
450f2ec16ccSHideki Saito       else
451f2ec16ccSHideki Saito         return false;
452f2ec16ccSHideki Saito     }
453f2ec16ccSHideki Saito 
454f2ec16ccSHideki Saito     // Check whether the BranchInst is a supported one. Only unconditional
455f2ec16ccSHideki Saito     // branches, conditional branches with an outer loop invariant condition or
456f2ec16ccSHideki Saito     // backedges are supported.
4574e4ecae0SHideki Saito     // FIXME: We skip these checks when VPlan predication is enabled as we
4584e4ecae0SHideki Saito     // want to allow divergent branches. This whole check will be removed
4594e4ecae0SHideki Saito     // once VPlan predication is on by default.
4604e4ecae0SHideki Saito     if (!EnableVPlanPredication && Br && Br->isConditional() &&
461f2ec16ccSHideki Saito         !TheLoop->isLoopInvariant(Br->getCondition()) &&
462f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(0)) &&
463f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(1))) {
4649e97caf5SRenato Golin       reportVectorizationFailure("Unsupported conditional branch",
4659e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
466ec818d7fSHideki Saito           "CFGNotUnderstood", ORE, TheLoop);
467f2ec16ccSHideki Saito       if (DoExtraAnalysis)
468f2ec16ccSHideki Saito         Result = false;
469f2ec16ccSHideki Saito       else
470f2ec16ccSHideki Saito         return false;
471f2ec16ccSHideki Saito     }
472f2ec16ccSHideki Saito   }
473f2ec16ccSHideki Saito 
474f2ec16ccSHideki Saito   // Check whether inner loops are uniform. At this point, we only support
475f2ec16ccSHideki Saito   // simple outer loops scenarios with uniform nested loops.
476f2ec16ccSHideki Saito   if (!isUniformLoopNest(TheLoop /*loop nest*/,
477f2ec16ccSHideki Saito                          TheLoop /*context outer loop*/)) {
4789e97caf5SRenato Golin     reportVectorizationFailure("Outer loop contains divergent loops",
4799e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
480ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
481f2ec16ccSHideki Saito     if (DoExtraAnalysis)
482f2ec16ccSHideki Saito       Result = false;
483f2ec16ccSHideki Saito     else
484f2ec16ccSHideki Saito       return false;
485f2ec16ccSHideki Saito   }
486f2ec16ccSHideki Saito 
487ea7f3035SHideki Saito   // Check whether we are able to set up outer loop induction.
488ea7f3035SHideki Saito   if (!setupOuterLoopInductions()) {
4899e97caf5SRenato Golin     reportVectorizationFailure("Unsupported outer loop Phi(s)",
4909e97caf5SRenato Golin                                "Unsupported outer loop Phi(s)",
491ec818d7fSHideki Saito                                "UnsupportedPhi", ORE, TheLoop);
492ea7f3035SHideki Saito     if (DoExtraAnalysis)
493ea7f3035SHideki Saito       Result = false;
494ea7f3035SHideki Saito     else
495ea7f3035SHideki Saito       return false;
496ea7f3035SHideki Saito   }
497ea7f3035SHideki Saito 
498f2ec16ccSHideki Saito   return Result;
499f2ec16ccSHideki Saito }
500f2ec16ccSHideki Saito 
501f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi(
502f2ec16ccSHideki Saito     PHINode *Phi, const InductionDescriptor &ID,
503f2ec16ccSHideki Saito     SmallPtrSetImpl<Value *> &AllowedExit) {
504f2ec16ccSHideki Saito   Inductions[Phi] = ID;
505f2ec16ccSHideki Saito 
506f2ec16ccSHideki Saito   // In case this induction also comes with casts that we know we can ignore
507f2ec16ccSHideki Saito   // in the vectorized loop body, record them here. All casts could be recorded
508f2ec16ccSHideki Saito   // here for ignoring, but suffices to record only the first (as it is the
509f2ec16ccSHideki Saito   // only one that may bw used outside the cast sequence).
510f2ec16ccSHideki Saito   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
511f2ec16ccSHideki Saito   if (!Casts.empty())
512f2ec16ccSHideki Saito     InductionCastsToIgnore.insert(*Casts.begin());
513f2ec16ccSHideki Saito 
514f2ec16ccSHideki Saito   Type *PhiTy = Phi->getType();
515f2ec16ccSHideki Saito   const DataLayout &DL = Phi->getModule()->getDataLayout();
516f2ec16ccSHideki Saito 
517f2ec16ccSHideki Saito   // Get the widest type.
518f2ec16ccSHideki Saito   if (!PhiTy->isFloatingPointTy()) {
519f2ec16ccSHideki Saito     if (!WidestIndTy)
520f2ec16ccSHideki Saito       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
521f2ec16ccSHideki Saito     else
522f2ec16ccSHideki Saito       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
523f2ec16ccSHideki Saito   }
524f2ec16ccSHideki Saito 
525f2ec16ccSHideki Saito   // Int inductions are special because we only allow one IV.
526f2ec16ccSHideki Saito   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
527f2ec16ccSHideki Saito       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
528f2ec16ccSHideki Saito       isa<Constant>(ID.getStartValue()) &&
529f2ec16ccSHideki Saito       cast<Constant>(ID.getStartValue())->isNullValue()) {
530f2ec16ccSHideki Saito 
531f2ec16ccSHideki Saito     // Use the phi node with the widest type as induction. Use the last
532f2ec16ccSHideki Saito     // one if there are multiple (no good reason for doing this other
533f2ec16ccSHideki Saito     // than it is expedient). We've checked that it begins at zero and
534f2ec16ccSHideki Saito     // steps by one, so this is a canonical induction variable.
535f2ec16ccSHideki Saito     if (!PrimaryInduction || PhiTy == WidestIndTy)
536f2ec16ccSHideki Saito       PrimaryInduction = Phi;
537f2ec16ccSHideki Saito   }
538f2ec16ccSHideki Saito 
539f2ec16ccSHideki Saito   // Both the PHI node itself, and the "post-increment" value feeding
540f2ec16ccSHideki Saito   // back into the PHI node may have external users.
541f2ec16ccSHideki Saito   // We can allow those uses, except if the SCEVs we have for them rely
542f2ec16ccSHideki Saito   // on predicates that only hold within the loop, since allowing the exit
5436a1dd77fSAnna Thomas   // currently means re-using this SCEV outside the loop (see PR33706 for more
5446a1dd77fSAnna Thomas   // details).
545f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().isAlwaysTrue()) {
546f2ec16ccSHideki Saito     AllowedExit.insert(Phi);
547f2ec16ccSHideki Saito     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
548f2ec16ccSHideki Saito   }
549f2ec16ccSHideki Saito 
550d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
551f2ec16ccSHideki Saito }
552f2ec16ccSHideki Saito 
553ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() {
554ea7f3035SHideki Saito   BasicBlock *Header = TheLoop->getHeader();
555ea7f3035SHideki Saito 
556ea7f3035SHideki Saito   // Returns true if a given Phi is a supported induction.
557ea7f3035SHideki Saito   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
558ea7f3035SHideki Saito     InductionDescriptor ID;
559ea7f3035SHideki Saito     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
560ea7f3035SHideki Saito         ID.getKind() == InductionDescriptor::IK_IntInduction) {
561ea7f3035SHideki Saito       addInductionPhi(&Phi, ID, AllowedExit);
562ea7f3035SHideki Saito       return true;
563ea7f3035SHideki Saito     } else {
564ea7f3035SHideki Saito       // Bail out for any Phi in the outer loop header that is not a supported
565ea7f3035SHideki Saito       // induction.
566ea7f3035SHideki Saito       LLVM_DEBUG(
567ea7f3035SHideki Saito           dbgs()
568ea7f3035SHideki Saito           << "LV: Found unsupported PHI for outer loop vectorization.\n");
569ea7f3035SHideki Saito       return false;
570ea7f3035SHideki Saito     }
571ea7f3035SHideki Saito   };
572ea7f3035SHideki Saito 
573ea7f3035SHideki Saito   if (llvm::all_of(Header->phis(), isSupportedPhi))
574ea7f3035SHideki Saito     return true;
575ea7f3035SHideki Saito   else
576ea7f3035SHideki Saito     return false;
577ea7f3035SHideki Saito }
578ea7f3035SHideki Saito 
57966c120f0SFrancesco Petrogalli /// Checks if a function is scalarizable according to the TLI, in
58066c120f0SFrancesco Petrogalli /// the sense that it should be vectorized and then expanded in
58166c120f0SFrancesco Petrogalli /// multiple scalarcalls. This is represented in the
58266c120f0SFrancesco Petrogalli /// TLI via mappings that do not specify a vector name, as in the
58366c120f0SFrancesco Petrogalli /// following example:
58466c120f0SFrancesco Petrogalli ///
58566c120f0SFrancesco Petrogalli ///    const VecDesc VecIntrinsics[] = {
58666c120f0SFrancesco Petrogalli ///      {"llvm.phx.abs.i32", "", 4}
58766c120f0SFrancesco Petrogalli ///    };
58866c120f0SFrancesco Petrogalli static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
58966c120f0SFrancesco Petrogalli   const StringRef ScalarName = CI.getCalledFunction()->getName();
59066c120f0SFrancesco Petrogalli   bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
59166c120f0SFrancesco Petrogalli   // Check that all known VFs are not associated to a vector
59266c120f0SFrancesco Petrogalli   // function, i.e. the vector name is emty.
59366c120f0SFrancesco Petrogalli   if (Scalarize)
59466c120f0SFrancesco Petrogalli     for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName);
59566c120f0SFrancesco Petrogalli          VF <= WidestVF; VF *= 2) {
59666c120f0SFrancesco Petrogalli       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
59766c120f0SFrancesco Petrogalli     }
59866c120f0SFrancesco Petrogalli   return Scalarize;
59966c120f0SFrancesco Petrogalli }
60066c120f0SFrancesco Petrogalli 
601f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() {
602f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
603f2ec16ccSHideki Saito 
604f2ec16ccSHideki Saito   // Look for the attribute signaling the absence of NaNs.
605f2ec16ccSHideki Saito   Function &F = *Header->getParent();
606f2ec16ccSHideki Saito   HasFunNoNaNAttr =
607f2ec16ccSHideki Saito       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
608f2ec16ccSHideki Saito 
609f2ec16ccSHideki Saito   // For each block in the loop.
610f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
611f2ec16ccSHideki Saito     // Scan the instructions in the block and look for hazards.
612f2ec16ccSHideki Saito     for (Instruction &I : *BB) {
613f2ec16ccSHideki Saito       if (auto *Phi = dyn_cast<PHINode>(&I)) {
614f2ec16ccSHideki Saito         Type *PhiTy = Phi->getType();
615f2ec16ccSHideki Saito         // Check that this PHI type is allowed.
616f2ec16ccSHideki Saito         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
617f2ec16ccSHideki Saito             !PhiTy->isPointerTy()) {
6189e97caf5SRenato Golin           reportVectorizationFailure("Found a non-int non-pointer PHI",
6199e97caf5SRenato Golin                                      "loop control flow is not understood by vectorizer",
620ec818d7fSHideki Saito                                      "CFGNotUnderstood", ORE, TheLoop);
621f2ec16ccSHideki Saito           return false;
622f2ec16ccSHideki Saito         }
623f2ec16ccSHideki Saito 
624f2ec16ccSHideki Saito         // If this PHINode is not in the header block, then we know that we
625f2ec16ccSHideki Saito         // can convert it to select during if-conversion. No need to check if
626f2ec16ccSHideki Saito         // the PHIs in this block are induction or reduction variables.
627f2ec16ccSHideki Saito         if (BB != Header) {
62860a1e4ddSAnna Thomas           // Non-header phi nodes that have outside uses can be vectorized. Add
62960a1e4ddSAnna Thomas           // them to the list of allowed exits.
63060a1e4ddSAnna Thomas           // Unsafe cyclic dependencies with header phis are identified during
63160a1e4ddSAnna Thomas           // legalization for reduction, induction and first order
63260a1e4ddSAnna Thomas           // recurrences.
633dd18ce45SBjorn Pettersson           AllowedExit.insert(&I);
634f2ec16ccSHideki Saito           continue;
635f2ec16ccSHideki Saito         }
636f2ec16ccSHideki Saito 
637f2ec16ccSHideki Saito         // We only allow if-converted PHIs with exactly two incoming values.
638f2ec16ccSHideki Saito         if (Phi->getNumIncomingValues() != 2) {
6399e97caf5SRenato Golin           reportVectorizationFailure("Found an invalid PHI",
6409e97caf5SRenato Golin               "loop control flow is not understood by vectorizer",
641ec818d7fSHideki Saito               "CFGNotUnderstood", ORE, TheLoop, Phi);
642f2ec16ccSHideki Saito           return false;
643f2ec16ccSHideki Saito         }
644f2ec16ccSHideki Saito 
645f2ec16ccSHideki Saito         RecurrenceDescriptor RedDes;
646f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
647f2ec16ccSHideki Saito                                                  DT)) {
648f2ec16ccSHideki Saito           if (RedDes.hasUnsafeAlgebra())
649f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
650f2ec16ccSHideki Saito           AllowedExit.insert(RedDes.getLoopExitInstr());
651f2ec16ccSHideki Saito           Reductions[Phi] = RedDes;
652f2ec16ccSHideki Saito           continue;
653f2ec16ccSHideki Saito         }
654f2ec16ccSHideki Saito 
655b02b0ad8SAnna Thomas         // TODO: Instead of recording the AllowedExit, it would be good to record the
656b02b0ad8SAnna Thomas         // complementary set: NotAllowedExit. These include (but may not be
657b02b0ad8SAnna Thomas         // limited to):
658b02b0ad8SAnna Thomas         // 1. Reduction phis as they represent the one-before-last value, which
659b02b0ad8SAnna Thomas         // is not available when vectorized
660b02b0ad8SAnna Thomas         // 2. Induction phis and increment when SCEV predicates cannot be used
661b02b0ad8SAnna Thomas         // outside the loop - see addInductionPhi
662b02b0ad8SAnna Thomas         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
663b02b0ad8SAnna Thomas         // outside the loop - see call to hasOutsideLoopUser in the non-phi
664b02b0ad8SAnna Thomas         // handling below
665b02b0ad8SAnna Thomas         // 4. FirstOrderRecurrence phis that can possibly be handled by
666b02b0ad8SAnna Thomas         // extraction.
667b02b0ad8SAnna Thomas         // By recording these, we can then reason about ways to vectorize each
668b02b0ad8SAnna Thomas         // of these NotAllowedExit.
669f2ec16ccSHideki Saito         InductionDescriptor ID;
670f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
671f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
672f2ec16ccSHideki Saito           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
673f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
674f2ec16ccSHideki Saito           continue;
675f2ec16ccSHideki Saito         }
676f2ec16ccSHideki Saito 
677f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
678f2ec16ccSHideki Saito                                                          SinkAfter, DT)) {
6798e0c5f72SAyal Zaks           AllowedExit.insert(Phi);
680f2ec16ccSHideki Saito           FirstOrderRecurrences.insert(Phi);
681f2ec16ccSHideki Saito           continue;
682f2ec16ccSHideki Saito         }
683f2ec16ccSHideki Saito 
684f2ec16ccSHideki Saito         // As a last resort, coerce the PHI to a AddRec expression
685f2ec16ccSHideki Saito         // and re-try classifying it a an induction PHI.
686f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
687f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
688f2ec16ccSHideki Saito           continue;
689f2ec16ccSHideki Saito         }
690f2ec16ccSHideki Saito 
6919e97caf5SRenato Golin         reportVectorizationFailure("Found an unidentified PHI",
6929e97caf5SRenato Golin             "value that could not be identified as "
6939e97caf5SRenato Golin             "reduction is used outside the loop",
694ec818d7fSHideki Saito             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
695f2ec16ccSHideki Saito         return false;
696f2ec16ccSHideki Saito       } // end of PHI handling
697f2ec16ccSHideki Saito 
698f2ec16ccSHideki Saito       // We handle calls that:
699f2ec16ccSHideki Saito       //   * Are debug info intrinsics.
700f2ec16ccSHideki Saito       //   * Have a mapping to an IR intrinsic.
701f2ec16ccSHideki Saito       //   * Have a vector version available.
702f2ec16ccSHideki Saito       auto *CI = dyn_cast<CallInst>(&I);
70366c120f0SFrancesco Petrogalli 
704f2ec16ccSHideki Saito       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
705f2ec16ccSHideki Saito           !isa<DbgInfoIntrinsic>(CI) &&
706f2ec16ccSHideki Saito           !(CI->getCalledFunction() && TLI &&
70766c120f0SFrancesco Petrogalli             (!VFDatabase::getMappings(*CI).empty() ||
70866c120f0SFrancesco Petrogalli              isTLIScalarize(*TLI, *CI)))) {
7097d65fe5cSSanjay Patel         // If the call is a recognized math libary call, it is likely that
7107d65fe5cSSanjay Patel         // we can vectorize it given loosened floating-point constraints.
7117d65fe5cSSanjay Patel         LibFunc Func;
7127d65fe5cSSanjay Patel         bool IsMathLibCall =
7137d65fe5cSSanjay Patel             TLI && CI->getCalledFunction() &&
7147d65fe5cSSanjay Patel             CI->getType()->isFloatingPointTy() &&
7157d65fe5cSSanjay Patel             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
7167d65fe5cSSanjay Patel             TLI->hasOptimizedCodeGen(Func);
7177d65fe5cSSanjay Patel 
7187d65fe5cSSanjay Patel         if (IsMathLibCall) {
7197d65fe5cSSanjay Patel           // TODO: Ideally, we should not use clang-specific language here,
7207d65fe5cSSanjay Patel           // but it's hard to provide meaningful yet generic advice.
7217d65fe5cSSanjay Patel           // Also, should this be guarded by allowExtraAnalysis() and/or be part
7227d65fe5cSSanjay Patel           // of the returned info from isFunctionVectorizable()?
72366c120f0SFrancesco Petrogalli           reportVectorizationFailure(
72466c120f0SFrancesco Petrogalli               "Found a non-intrinsic callsite",
7259e97caf5SRenato Golin               "library call cannot be vectorized. "
7267d65fe5cSSanjay Patel               "Try compiling with -fno-math-errno, -ffast-math, "
7279e97caf5SRenato Golin               "or similar flags",
728ec818d7fSHideki Saito               "CantVectorizeLibcall", ORE, TheLoop, CI);
7297d65fe5cSSanjay Patel         } else {
7309e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
7319e97caf5SRenato Golin                                      "call instruction cannot be vectorized",
732ec818d7fSHideki Saito                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
7337d65fe5cSSanjay Patel         }
734f2ec16ccSHideki Saito         return false;
735f2ec16ccSHideki Saito       }
736f2ec16ccSHideki Saito 
737a066f1f9SSimon Pilgrim       // Some intrinsics have scalar arguments and should be same in order for
738a066f1f9SSimon Pilgrim       // them to be vectorized (i.e. loop invariant).
739a066f1f9SSimon Pilgrim       if (CI) {
740f2ec16ccSHideki Saito         auto *SE = PSE.getSE();
741a066f1f9SSimon Pilgrim         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
742a066f1f9SSimon Pilgrim         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
743a066f1f9SSimon Pilgrim           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
744a066f1f9SSimon Pilgrim             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
7459e97caf5SRenato Golin               reportVectorizationFailure("Found unvectorizable intrinsic",
7469e97caf5SRenato Golin                   "intrinsic instruction cannot be vectorized",
747ec818d7fSHideki Saito                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
748f2ec16ccSHideki Saito               return false;
749f2ec16ccSHideki Saito             }
750f2ec16ccSHideki Saito           }
751a066f1f9SSimon Pilgrim       }
752f2ec16ccSHideki Saito 
753f2ec16ccSHideki Saito       // Check that the instruction return type is vectorizable.
754f2ec16ccSHideki Saito       // Also, we can't vectorize extractelement instructions.
755f2ec16ccSHideki Saito       if ((!VectorType::isValidElementType(I.getType()) &&
756f2ec16ccSHideki Saito            !I.getType()->isVoidTy()) ||
757f2ec16ccSHideki Saito           isa<ExtractElementInst>(I)) {
7589e97caf5SRenato Golin         reportVectorizationFailure("Found unvectorizable type",
7599e97caf5SRenato Golin             "instruction return type cannot be vectorized",
760ec818d7fSHideki Saito             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
761f2ec16ccSHideki Saito         return false;
762f2ec16ccSHideki Saito       }
763f2ec16ccSHideki Saito 
764f2ec16ccSHideki Saito       // Check that the stored type is vectorizable.
765f2ec16ccSHideki Saito       if (auto *ST = dyn_cast<StoreInst>(&I)) {
766f2ec16ccSHideki Saito         Type *T = ST->getValueOperand()->getType();
767f2ec16ccSHideki Saito         if (!VectorType::isValidElementType(T)) {
7689e97caf5SRenato Golin           reportVectorizationFailure("Store instruction cannot be vectorized",
7699e97caf5SRenato Golin                                      "store instruction cannot be vectorized",
770ec818d7fSHideki Saito                                      "CantVectorizeStore", ORE, TheLoop, ST);
771f2ec16ccSHideki Saito           return false;
772f2ec16ccSHideki Saito         }
773f2ec16ccSHideki Saito 
7746452bdd2SWarren Ristow         // For nontemporal stores, check that a nontemporal vector version is
7756452bdd2SWarren Ristow         // supported on the target.
7766452bdd2SWarren Ristow         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
7776452bdd2SWarren Ristow           // Arbitrarily try a vector of 2 elements.
778d2befc66SChristopher Tetreault           auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2);
7796452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of stored type");
78052e98f62SNikita Popov           if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
7816452bdd2SWarren Ristow             reportVectorizationFailure(
7826452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
7836452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
784ec818d7fSHideki Saito                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
7856452bdd2SWarren Ristow             return false;
7866452bdd2SWarren Ristow           }
7876452bdd2SWarren Ristow         }
7886452bdd2SWarren Ristow 
7896452bdd2SWarren Ristow       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
7906452bdd2SWarren Ristow         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
7916452bdd2SWarren Ristow           // For nontemporal loads, check that a nontemporal vector version is
7926452bdd2SWarren Ristow           // supported on the target (arbitrarily try a vector of 2 elements).
793d2befc66SChristopher Tetreault           auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2);
7946452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of load type");
79552e98f62SNikita Popov           if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
7966452bdd2SWarren Ristow             reportVectorizationFailure(
7976452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
7986452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
799ec818d7fSHideki Saito                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
8006452bdd2SWarren Ristow             return false;
8016452bdd2SWarren Ristow           }
8026452bdd2SWarren Ristow         }
8036452bdd2SWarren Ristow 
804f2ec16ccSHideki Saito         // FP instructions can allow unsafe algebra, thus vectorizable by
805f2ec16ccSHideki Saito         // non-IEEE-754 compliant SIMD units.
806f2ec16ccSHideki Saito         // This applies to floating-point math operations and calls, not memory
807f2ec16ccSHideki Saito         // operations, shuffles, or casts, as they don't change precision or
808f2ec16ccSHideki Saito         // semantics.
809f2ec16ccSHideki Saito       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
810f2ec16ccSHideki Saito                  !I.isFast()) {
811d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
812f2ec16ccSHideki Saito         Hints->setPotentiallyUnsafe();
813f2ec16ccSHideki Saito       }
814f2ec16ccSHideki Saito 
815f2ec16ccSHideki Saito       // Reduction instructions are allowed to have exit users.
816f2ec16ccSHideki Saito       // All other instructions must not have external users.
817f2ec16ccSHideki Saito       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
818b02b0ad8SAnna Thomas         // We can safely vectorize loops where instructions within the loop are
819b02b0ad8SAnna Thomas         // used outside the loop only if the SCEV predicates within the loop is
820b02b0ad8SAnna Thomas         // same as outside the loop. Allowing the exit means reusing the SCEV
821b02b0ad8SAnna Thomas         // outside the loop.
822b02b0ad8SAnna Thomas         if (PSE.getUnionPredicate().isAlwaysTrue()) {
823b02b0ad8SAnna Thomas           AllowedExit.insert(&I);
824b02b0ad8SAnna Thomas           continue;
825b02b0ad8SAnna Thomas         }
8269e97caf5SRenato Golin         reportVectorizationFailure("Value cannot be used outside the loop",
8279e97caf5SRenato Golin                                    "value cannot be used outside the loop",
828ec818d7fSHideki Saito                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
829f2ec16ccSHideki Saito         return false;
830f2ec16ccSHideki Saito       }
831f2ec16ccSHideki Saito     } // next instr.
832f2ec16ccSHideki Saito   }
833f2ec16ccSHideki Saito 
834f2ec16ccSHideki Saito   if (!PrimaryInduction) {
835f2ec16ccSHideki Saito     if (Inductions.empty()) {
8369e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8379e97caf5SRenato Golin           "loop induction variable could not be identified",
838ec818d7fSHideki Saito           "NoInductionVariable", ORE, TheLoop);
839f2ec16ccSHideki Saito       return false;
8404f27730eSWarren Ristow     } else if (!WidestIndTy) {
8419e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8429e97caf5SRenato Golin           "integer loop induction variable could not be identified",
843ec818d7fSHideki Saito           "NoIntegerInductionVariable", ORE, TheLoop);
8444f27730eSWarren Ristow       return false;
8459e97caf5SRenato Golin     } else {
8469e97caf5SRenato Golin       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
847f2ec16ccSHideki Saito     }
848f2ec16ccSHideki Saito   }
849f2ec16ccSHideki Saito 
8509d24933fSFlorian Hahn   // For first order recurrences, we use the previous value (incoming value from
8519d24933fSFlorian Hahn   // the latch) to check if it dominates all users of the recurrence. Bail out
8529d24933fSFlorian Hahn   // if we have to sink such an instruction for another recurrence, as the
8539d24933fSFlorian Hahn   // dominance requirement may not hold after sinking.
8549d24933fSFlorian Hahn   BasicBlock *LoopLatch = TheLoop->getLoopLatch();
8559d24933fSFlorian Hahn   if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
8569d24933fSFlorian Hahn         Instruction *V =
8579d24933fSFlorian Hahn             cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
8589d24933fSFlorian Hahn         return SinkAfter.find(V) != SinkAfter.end();
8599d24933fSFlorian Hahn       }))
8609d24933fSFlorian Hahn     return false;
8619d24933fSFlorian Hahn 
862f2ec16ccSHideki Saito   // Now we know the widest induction type, check if our found induction
863f2ec16ccSHideki Saito   // is the same size. If it's not, unset it here and InnerLoopVectorizer
864f2ec16ccSHideki Saito   // will create another.
865f2ec16ccSHideki Saito   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
866f2ec16ccSHideki Saito     PrimaryInduction = nullptr;
867f2ec16ccSHideki Saito 
868f2ec16ccSHideki Saito   return true;
869f2ec16ccSHideki Saito }
870f2ec16ccSHideki Saito 
871f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() {
872f2ec16ccSHideki Saito   LAI = &(*GetLAA)(*TheLoop);
873f2ec16ccSHideki Saito   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
874f2ec16ccSHideki Saito   if (LAR) {
875f2ec16ccSHideki Saito     ORE->emit([&]() {
876f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
877f2ec16ccSHideki Saito                                         "loop not vectorized: ", *LAR);
878f2ec16ccSHideki Saito     });
879f2ec16ccSHideki Saito   }
880f2ec16ccSHideki Saito   if (!LAI->canVectorizeMemory())
881f2ec16ccSHideki Saito     return false;
882f2ec16ccSHideki Saito 
8835e9215f0SAnna Thomas   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
8849e97caf5SRenato Golin     reportVectorizationFailure("Stores to a uniform address",
8859e97caf5SRenato Golin         "write to a loop invariant address could not be vectorized",
886ec818d7fSHideki Saito         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
887f2ec16ccSHideki Saito     return false;
888f2ec16ccSHideki Saito   }
889f2ec16ccSHideki Saito   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
890f2ec16ccSHideki Saito   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
891f2ec16ccSHideki Saito 
892f2ec16ccSHideki Saito   return true;
893f2ec16ccSHideki Saito }
894f2ec16ccSHideki Saito 
895f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
896f2ec16ccSHideki Saito   Value *In0 = const_cast<Value *>(V);
897f2ec16ccSHideki Saito   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
898f2ec16ccSHideki Saito   if (!PN)
899f2ec16ccSHideki Saito     return false;
900f2ec16ccSHideki Saito 
901f2ec16ccSHideki Saito   return Inductions.count(PN);
902f2ec16ccSHideki Saito }
903f2ec16ccSHideki Saito 
904f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
905f2ec16ccSHideki Saito   auto *Inst = dyn_cast<Instruction>(V);
906f2ec16ccSHideki Saito   return (Inst && InductionCastsToIgnore.count(Inst));
907f2ec16ccSHideki Saito }
908f2ec16ccSHideki Saito 
909f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
910f2ec16ccSHideki Saito   return isInductionPhi(V) || isCastedInductionVariable(V);
911f2ec16ccSHideki Saito }
912f2ec16ccSHideki Saito 
913f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
914f2ec16ccSHideki Saito   return FirstOrderRecurrences.count(Phi);
915f2ec16ccSHideki Saito }
916f2ec16ccSHideki Saito 
917f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
918f2ec16ccSHideki Saito   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
919f2ec16ccSHideki Saito }
920f2ec16ccSHideki Saito 
921f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated(
922d57d73daSDorit Nuzman     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) {
923f2ec16ccSHideki Saito   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
924f2ec16ccSHideki Saito 
925f2ec16ccSHideki Saito   for (Instruction &I : *BB) {
926f2ec16ccSHideki Saito     // Check that we don't have a constant expression that can trap as operand.
927f2ec16ccSHideki Saito     for (Value *Operand : I.operands()) {
928f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(Operand))
929f2ec16ccSHideki Saito         if (C->canTrap())
930f2ec16ccSHideki Saito           return false;
931f2ec16ccSHideki Saito     }
93223c11380SFlorian Hahn 
93323c11380SFlorian Hahn     // We can predicate blocks with calls to assume, as long as we drop them in
93423c11380SFlorian Hahn     // case we flatten the CFG via predication.
93523c11380SFlorian Hahn     if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
93623c11380SFlorian Hahn       ConditionalAssumes.insert(&I);
93723c11380SFlorian Hahn       continue;
93823c11380SFlorian Hahn     }
93923c11380SFlorian Hahn 
940f2ec16ccSHideki Saito     // We might be able to hoist the load.
941f2ec16ccSHideki Saito     if (I.mayReadFromMemory()) {
942f2ec16ccSHideki Saito       auto *LI = dyn_cast<LoadInst>(&I);
943f2ec16ccSHideki Saito       if (!LI)
944f2ec16ccSHideki Saito         return false;
945f2ec16ccSHideki Saito       if (!SafePtrs.count(LI->getPointerOperand())) {
946f2ec16ccSHideki Saito         // !llvm.mem.parallel_loop_access implies if-conversion safety.
947f2ec16ccSHideki Saito         // Otherwise, record that the load needs (real or emulated) masking
948f2ec16ccSHideki Saito         // and let the cost model decide.
949d57d73daSDorit Nuzman         if (!IsAnnotatedParallel || PreserveGuards)
950f2ec16ccSHideki Saito           MaskedOp.insert(LI);
951f2ec16ccSHideki Saito         continue;
952f2ec16ccSHideki Saito       }
953f2ec16ccSHideki Saito     }
954f2ec16ccSHideki Saito 
955f2ec16ccSHideki Saito     if (I.mayWriteToMemory()) {
956f2ec16ccSHideki Saito       auto *SI = dyn_cast<StoreInst>(&I);
957f2ec16ccSHideki Saito       if (!SI)
958f2ec16ccSHideki Saito         return false;
959f2ec16ccSHideki Saito       // Predicated store requires some form of masking:
960f2ec16ccSHideki Saito       // 1) masked store HW instruction,
961f2ec16ccSHideki Saito       // 2) emulation via load-blend-store (only if safe and legal to do so,
962f2ec16ccSHideki Saito       //    be aware on the race conditions), or
963f2ec16ccSHideki Saito       // 3) element-by-element predicate check and scalar store.
964f2ec16ccSHideki Saito       MaskedOp.insert(SI);
965f2ec16ccSHideki Saito       continue;
966f2ec16ccSHideki Saito     }
967f2ec16ccSHideki Saito     if (I.mayThrow())
968f2ec16ccSHideki Saito       return false;
969f2ec16ccSHideki Saito   }
970f2ec16ccSHideki Saito 
971f2ec16ccSHideki Saito   return true;
972f2ec16ccSHideki Saito }
973f2ec16ccSHideki Saito 
974f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
975f2ec16ccSHideki Saito   if (!EnableIfConversion) {
9769e97caf5SRenato Golin     reportVectorizationFailure("If-conversion is disabled",
9779e97caf5SRenato Golin                                "if-conversion is disabled",
978ec818d7fSHideki Saito                                "IfConversionDisabled",
979ec818d7fSHideki Saito                                ORE, TheLoop);
980f2ec16ccSHideki Saito     return false;
981f2ec16ccSHideki Saito   }
982f2ec16ccSHideki Saito 
983f2ec16ccSHideki Saito   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
984f2ec16ccSHideki Saito 
985cf3b5559SPhilip Reames   // A list of pointers which are known to be dereferenceable within scope of
986cf3b5559SPhilip Reames   // the loop body for each iteration of the loop which executes.  That is,
987cf3b5559SPhilip Reames   // the memory pointed to can be dereferenced (with the access size implied by
988cf3b5559SPhilip Reames   // the value's type) unconditionally within the loop header without
989cf3b5559SPhilip Reames   // introducing a new fault.
9903bbc71d6SSjoerd Meijer   SmallPtrSet<Value *, 8> SafePointers;
991f2ec16ccSHideki Saito 
992f2ec16ccSHideki Saito   // Collect safe addresses.
993f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
9947403569bSPhilip Reames     if (!blockNeedsPredication(BB)) {
995f2ec16ccSHideki Saito       for (Instruction &I : *BB)
996f2ec16ccSHideki Saito         if (auto *Ptr = getLoadStorePointerOperand(&I))
9973bbc71d6SSjoerd Meijer           SafePointers.insert(Ptr);
9987403569bSPhilip Reames       continue;
9997403569bSPhilip Reames     }
10007403569bSPhilip Reames 
10017403569bSPhilip Reames     // For a block which requires predication, a address may be safe to access
10027403569bSPhilip Reames     // in the loop w/o predication if we can prove dereferenceability facts
10037403569bSPhilip Reames     // sufficient to ensure it'll never fault within the loop. For the moment,
10047403569bSPhilip Reames     // we restrict this to loads; stores are more complicated due to
10057403569bSPhilip Reames     // concurrency restrictions.
10067403569bSPhilip Reames     ScalarEvolution &SE = *PSE.getSE();
10077403569bSPhilip Reames     for (Instruction &I : *BB) {
10087403569bSPhilip Reames       LoadInst *LI = dyn_cast<LoadInst>(&I);
10097403569bSPhilip Reames       if (LI && !mustSuppressSpeculation(*LI) &&
10107403569bSPhilip Reames           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
10113bbc71d6SSjoerd Meijer         SafePointers.insert(LI->getPointerOperand());
10127403569bSPhilip Reames     }
1013f2ec16ccSHideki Saito   }
1014f2ec16ccSHideki Saito 
1015f2ec16ccSHideki Saito   // Collect the blocks that need predication.
1016f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
1017f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
1018f2ec16ccSHideki Saito     // We don't support switch statements inside loops.
1019f2ec16ccSHideki Saito     if (!isa<BranchInst>(BB->getTerminator())) {
10209e97caf5SRenato Golin       reportVectorizationFailure("Loop contains a switch statement",
10219e97caf5SRenato Golin                                  "loop contains a switch statement",
1022ec818d7fSHideki Saito                                  "LoopContainsSwitch", ORE, TheLoop,
1023ec818d7fSHideki Saito                                  BB->getTerminator());
1024f2ec16ccSHideki Saito       return false;
1025f2ec16ccSHideki Saito     }
1026f2ec16ccSHideki Saito 
1027f2ec16ccSHideki Saito     // We must be able to predicate all blocks that need to be predicated.
1028f2ec16ccSHideki Saito     if (blockNeedsPredication(BB)) {
10293bbc71d6SSjoerd Meijer       if (!blockCanBePredicated(BB, SafePointers)) {
10309e97caf5SRenato Golin         reportVectorizationFailure(
10319e97caf5SRenato Golin             "Control flow cannot be substituted for a select",
10329e97caf5SRenato Golin             "control flow cannot be substituted for a select",
1033ec818d7fSHideki Saito             "NoCFGForSelect", ORE, TheLoop,
1034ec818d7fSHideki Saito             BB->getTerminator());
1035f2ec16ccSHideki Saito         return false;
1036f2ec16ccSHideki Saito       }
1037f2ec16ccSHideki Saito     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
10389e97caf5SRenato Golin       reportVectorizationFailure(
10399e97caf5SRenato Golin           "Control flow cannot be substituted for a select",
10409e97caf5SRenato Golin           "control flow cannot be substituted for a select",
1041ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
1042ec818d7fSHideki Saito           BB->getTerminator());
1043f2ec16ccSHideki Saito       return false;
1044f2ec16ccSHideki Saito     }
1045f2ec16ccSHideki Saito   }
1046f2ec16ccSHideki Saito 
1047f2ec16ccSHideki Saito   // We can if-convert this loop.
1048f2ec16ccSHideki Saito   return true;
1049f2ec16ccSHideki Saito }
1050f2ec16ccSHideki Saito 
1051f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG.
1052f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1053f2ec16ccSHideki Saito                                                     bool UseVPlanNativePath) {
1054f2ec16ccSHideki Saito   assert((UseVPlanNativePath || Lp->empty()) &&
1055f2ec16ccSHideki Saito          "VPlan-native path is not enabled.");
1056f2ec16ccSHideki Saito 
1057f2ec16ccSHideki Saito   // TODO: ORE should be improved to show more accurate information when an
1058f2ec16ccSHideki Saito   // outer loop can't be vectorized because a nested loop is not understood or
1059f2ec16ccSHideki Saito   // legal. Something like: "outer_loop_location: loop not vectorized:
1060f2ec16ccSHideki Saito   // (inner_loop_location) loop control flow is not understood by vectorizer".
1061f2ec16ccSHideki Saito 
1062f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1063f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1064f2ec16ccSHideki Saito   bool Result = true;
1065f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1066f2ec16ccSHideki Saito 
1067f2ec16ccSHideki Saito   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1068f2ec16ccSHideki Saito   // be canonicalized.
1069f2ec16ccSHideki Saito   if (!Lp->getLoopPreheader()) {
10709e97caf5SRenato Golin     reportVectorizationFailure("Loop doesn't have a legal pre-header",
10719e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1072ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1073f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1074f2ec16ccSHideki Saito       Result = false;
1075f2ec16ccSHideki Saito     else
1076f2ec16ccSHideki Saito       return false;
1077f2ec16ccSHideki Saito   }
1078f2ec16ccSHideki Saito 
1079f2ec16ccSHideki Saito   // We must have a single backedge.
1080f2ec16ccSHideki Saito   if (Lp->getNumBackEdges() != 1) {
10819e97caf5SRenato Golin     reportVectorizationFailure("The loop must have a single backedge",
10829e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1083ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1084f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1085f2ec16ccSHideki Saito       Result = false;
1086f2ec16ccSHideki Saito     else
1087f2ec16ccSHideki Saito       return false;
1088f2ec16ccSHideki Saito   }
1089f2ec16ccSHideki Saito 
1090f2ec16ccSHideki Saito   // We must have a single exiting block.
1091f2ec16ccSHideki Saito   if (!Lp->getExitingBlock()) {
10929e97caf5SRenato Golin     reportVectorizationFailure("The loop must have an exiting block",
10939e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1094ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1095f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1096f2ec16ccSHideki Saito       Result = false;
1097f2ec16ccSHideki Saito     else
1098f2ec16ccSHideki Saito       return false;
1099f2ec16ccSHideki Saito   }
1100f2ec16ccSHideki Saito 
1101f2ec16ccSHideki Saito   // We only handle bottom-tested loops, i.e. loop in which the condition is
1102f2ec16ccSHideki Saito   // checked at the end of each iteration. With that we can assume that all
1103f2ec16ccSHideki Saito   // instructions in the loop are executed the same number of times.
1104f2ec16ccSHideki Saito   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
11059e97caf5SRenato Golin     reportVectorizationFailure("The exiting block is not the loop latch",
11069e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
1107ec818d7fSHideki Saito         "CFGNotUnderstood", ORE, TheLoop);
1108f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1109f2ec16ccSHideki Saito       Result = false;
1110f2ec16ccSHideki Saito     else
1111f2ec16ccSHideki Saito       return false;
1112f2ec16ccSHideki Saito   }
1113f2ec16ccSHideki Saito 
1114f2ec16ccSHideki Saito   return Result;
1115f2ec16ccSHideki Saito }
1116f2ec16ccSHideki Saito 
1117f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1118f2ec16ccSHideki Saito     Loop *Lp, bool UseVPlanNativePath) {
1119f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1120f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1121f2ec16ccSHideki Saito   bool Result = true;
1122f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1123f2ec16ccSHideki Saito   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1124f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1125f2ec16ccSHideki Saito       Result = false;
1126f2ec16ccSHideki Saito     else
1127f2ec16ccSHideki Saito       return false;
1128f2ec16ccSHideki Saito   }
1129f2ec16ccSHideki Saito 
1130f2ec16ccSHideki Saito   // Recursively check whether the loop control flow of nested loops is
1131f2ec16ccSHideki Saito   // understood.
1132f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
1133f2ec16ccSHideki Saito     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1134f2ec16ccSHideki Saito       if (DoExtraAnalysis)
1135f2ec16ccSHideki Saito         Result = false;
1136f2ec16ccSHideki Saito       else
1137f2ec16ccSHideki Saito         return false;
1138f2ec16ccSHideki Saito     }
1139f2ec16ccSHideki Saito 
1140f2ec16ccSHideki Saito   return Result;
1141f2ec16ccSHideki Saito }
1142f2ec16ccSHideki Saito 
1143f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1144f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1145f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1146f2ec16ccSHideki Saito   bool Result = true;
1147f2ec16ccSHideki Saito 
1148f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1149f2ec16ccSHideki Saito   // Check whether the loop-related control flow in the loop nest is expected by
1150f2ec16ccSHideki Saito   // vectorizer.
1151f2ec16ccSHideki Saito   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1152f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1153f2ec16ccSHideki Saito       Result = false;
1154f2ec16ccSHideki Saito     else
1155f2ec16ccSHideki Saito       return false;
1156f2ec16ccSHideki Saito   }
1157f2ec16ccSHideki Saito 
1158f2ec16ccSHideki Saito   // We need to have a loop header.
1159d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1160f2ec16ccSHideki Saito                     << '\n');
1161f2ec16ccSHideki Saito 
1162f2ec16ccSHideki Saito   // Specific checks for outer loops. We skip the remaining legal checks at this
1163f2ec16ccSHideki Saito   // point because they don't support outer loops.
1164f2ec16ccSHideki Saito   if (!TheLoop->empty()) {
1165f2ec16ccSHideki Saito     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1166f2ec16ccSHideki Saito 
1167f2ec16ccSHideki Saito     if (!canVectorizeOuterLoop()) {
11689e97caf5SRenato Golin       reportVectorizationFailure("Unsupported outer loop",
11699e97caf5SRenato Golin                                  "unsupported outer loop",
1170ec818d7fSHideki Saito                                  "UnsupportedOuterLoop",
1171ec818d7fSHideki Saito                                  ORE, TheLoop);
1172f2ec16ccSHideki Saito       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1173f2ec16ccSHideki Saito       // outer loops.
1174f2ec16ccSHideki Saito       return false;
1175f2ec16ccSHideki Saito     }
1176f2ec16ccSHideki Saito 
1177d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1178f2ec16ccSHideki Saito     return Result;
1179f2ec16ccSHideki Saito   }
1180f2ec16ccSHideki Saito 
1181f2ec16ccSHideki Saito   assert(TheLoop->empty() && "Inner loop expected.");
1182f2ec16ccSHideki Saito   // Check if we can if-convert non-single-bb loops.
1183f2ec16ccSHideki Saito   unsigned NumBlocks = TheLoop->getNumBlocks();
1184f2ec16ccSHideki Saito   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1185d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1186f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1187f2ec16ccSHideki Saito       Result = false;
1188f2ec16ccSHideki Saito     else
1189f2ec16ccSHideki Saito       return false;
1190f2ec16ccSHideki Saito   }
1191f2ec16ccSHideki Saito 
1192f2ec16ccSHideki Saito   // Check if we can vectorize the instructions and CFG in this loop.
1193f2ec16ccSHideki Saito   if (!canVectorizeInstrs()) {
1194d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1195f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1196f2ec16ccSHideki Saito       Result = false;
1197f2ec16ccSHideki Saito     else
1198f2ec16ccSHideki Saito       return false;
1199f2ec16ccSHideki Saito   }
1200f2ec16ccSHideki Saito 
1201f2ec16ccSHideki Saito   // Go over each instruction and look at memory deps.
1202f2ec16ccSHideki Saito   if (!canVectorizeMemory()) {
1203d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1204f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1205f2ec16ccSHideki Saito       Result = false;
1206f2ec16ccSHideki Saito     else
1207f2ec16ccSHideki Saito       return false;
1208f2ec16ccSHideki Saito   }
1209f2ec16ccSHideki Saito 
1210d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1211f2ec16ccSHideki Saito                     << (LAI->getRuntimePointerChecking()->Need
1212f2ec16ccSHideki Saito                             ? " (with a runtime bound check)"
1213f2ec16ccSHideki Saito                             : "")
1214f2ec16ccSHideki Saito                     << "!\n");
1215f2ec16ccSHideki Saito 
1216f2ec16ccSHideki Saito   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1217f2ec16ccSHideki Saito   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1218f2ec16ccSHideki Saito     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1219f2ec16ccSHideki Saito 
1220f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
12219e97caf5SRenato Golin     reportVectorizationFailure("Too many SCEV checks needed",
12229e97caf5SRenato Golin         "Too many SCEV assumptions need to be made and checked at runtime",
1223ec818d7fSHideki Saito         "TooManySCEVRunTimeChecks", ORE, TheLoop);
1224f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1225f2ec16ccSHideki Saito       Result = false;
1226f2ec16ccSHideki Saito     else
1227f2ec16ccSHideki Saito       return false;
1228f2ec16ccSHideki Saito   }
1229f2ec16ccSHideki Saito 
1230f2ec16ccSHideki Saito   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1231f2ec16ccSHideki Saito   // we can vectorize, and at this point we don't have any other mem analysis
1232f2ec16ccSHideki Saito   // which may limit our maximum vectorization factor, so just return true with
1233f2ec16ccSHideki Saito   // no restrictions.
1234f2ec16ccSHideki Saito   return Result;
1235f2ec16ccSHideki Saito }
1236f2ec16ccSHideki Saito 
1237d57d73daSDorit Nuzman bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
1238b0b5312eSAyal Zaks 
1239b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1240b0b5312eSAyal Zaks 
1241d15df0edSAyal Zaks   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
1242b0b5312eSAyal Zaks 
1243d0d38df0SDavid Green   for (auto &Reduction : getReductionVars())
1244d15df0edSAyal Zaks     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
1245d15df0edSAyal Zaks 
1246d15df0edSAyal Zaks   // TODO: handle non-reduction outside users when tail is folded by masking.
1247b0b5312eSAyal Zaks   for (auto *AE : AllowedExit) {
1248d15df0edSAyal Zaks     // Check that all users of allowed exit values are inside the loop or
1249d15df0edSAyal Zaks     // are the live-out of a reduction.
1250d15df0edSAyal Zaks     if (ReductionLiveOuts.count(AE))
1251d15df0edSAyal Zaks       continue;
1252b0b5312eSAyal Zaks     for (User *U : AE->users()) {
1253b0b5312eSAyal Zaks       Instruction *UI = cast<Instruction>(U);
1254b0b5312eSAyal Zaks       if (TheLoop->contains(UI))
1255b0b5312eSAyal Zaks         continue;
12569e97caf5SRenato Golin       reportVectorizationFailure(
12579e97caf5SRenato Golin           "Cannot fold tail by masking, loop has an outside user for",
12589e97caf5SRenato Golin           "Cannot fold tail by masking in the presence of live outs.",
1259ec818d7fSHideki Saito           "LiveOutFoldingTailByMasking", ORE, TheLoop, UI);
1260b0b5312eSAyal Zaks       return false;
1261b0b5312eSAyal Zaks     }
1262b0b5312eSAyal Zaks   }
1263b0b5312eSAyal Zaks 
1264b0b5312eSAyal Zaks   // The list of pointers that we can safely read and write to remains empty.
1265b0b5312eSAyal Zaks   SmallPtrSet<Value *, 8> SafePointers;
1266b0b5312eSAyal Zaks 
1267b0b5312eSAyal Zaks   // Check and mark all blocks for predication, including those that ordinarily
1268b0b5312eSAyal Zaks   // do not need predication such as the header block.
1269b0b5312eSAyal Zaks   for (BasicBlock *BB : TheLoop->blocks()) {
1270d57d73daSDorit Nuzman     if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) {
12719e97caf5SRenato Golin       reportVectorizationFailure(
12729e97caf5SRenato Golin           "Cannot fold tail by masking as required",
12739e97caf5SRenato Golin           "control flow cannot be substituted for a select",
1274ec818d7fSHideki Saito           "NoCFGForSelect", ORE, TheLoop,
1275ec818d7fSHideki Saito           BB->getTerminator());
1276b0b5312eSAyal Zaks       return false;
1277b0b5312eSAyal Zaks     }
1278b0b5312eSAyal Zaks   }
1279b0b5312eSAyal Zaks 
1280b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1281b0b5312eSAyal Zaks   return true;
1282b0b5312eSAyal Zaks }
1283b0b5312eSAyal Zaks 
1284f2ec16ccSHideki Saito } // namespace llvm
1285