1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===//
2f2ec16ccSHideki Saito //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f2ec16ccSHideki Saito //
7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===//
8f2ec16ccSHideki Saito //
9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code
10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time.
11f2ec16ccSHideki Saito //
12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis
13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there
14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first).
15f2ec16ccSHideki Saito //
16f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
17f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h"
18f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h"
19f2ec16ccSHideki Saito 
20f2ec16ccSHideki Saito using namespace llvm;
21f2ec16ccSHideki Saito 
22f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize"
23f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME
24f2ec16ccSHideki Saito 
254e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication;
264e4ecae0SHideki Saito 
27f2ec16ccSHideki Saito static cl::opt<bool>
28f2ec16ccSHideki Saito     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
29f2ec16ccSHideki Saito                        cl::desc("Enable if-conversion during vectorization."));
30f2ec16ccSHideki Saito 
31f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
32f2ec16ccSHideki Saito     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
33f2ec16ccSHideki Saito     cl::desc("The maximum allowed number of runtime memory checks with a "
34f2ec16ccSHideki Saito              "vectorize(enable) pragma."));
35f2ec16ccSHideki Saito 
36f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
37f2ec16ccSHideki Saito     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
38f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed."));
39f2ec16ccSHideki Saito 
40f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
41f2ec16ccSHideki Saito     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
42f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed with a "
43f2ec16ccSHideki Saito              "vectorize(enable) pragma"));
44f2ec16ccSHideki Saito 
45f2ec16ccSHideki Saito /// Maximum vectorization interleave count.
46f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16;
47f2ec16ccSHideki Saito 
48f2ec16ccSHideki Saito namespace llvm {
49f2ec16ccSHideki Saito 
5019189993SFangrui Song #ifndef NDEBUG
519e97caf5SRenato Golin static void debugVectorizationFailure(const StringRef DebugMsg,
529e97caf5SRenato Golin     Instruction *I) {
539e97caf5SRenato Golin   dbgs() << "LV: Not vectorizing: " << DebugMsg;
549e97caf5SRenato Golin   if (I != nullptr)
559e97caf5SRenato Golin     dbgs() << " " << *I;
569e97caf5SRenato Golin   else
579e97caf5SRenato Golin     dbgs() << '.';
589e97caf5SRenato Golin   dbgs() << '\n';
599e97caf5SRenato Golin }
6019189993SFangrui Song #endif
619e97caf5SRenato Golin 
62f2ec16ccSHideki Saito OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
63f2ec16ccSHideki Saito                                                   StringRef RemarkName,
64f2ec16ccSHideki Saito                                                   Loop *TheLoop,
65f2ec16ccSHideki Saito                                                   Instruction *I) {
66f2ec16ccSHideki Saito   Value *CodeRegion = TheLoop->getHeader();
67f2ec16ccSHideki Saito   DebugLoc DL = TheLoop->getStartLoc();
68f2ec16ccSHideki Saito 
69f2ec16ccSHideki Saito   if (I) {
70f2ec16ccSHideki Saito     CodeRegion = I->getParent();
71f2ec16ccSHideki Saito     // If there is no debug location attached to the instruction, revert back to
72f2ec16ccSHideki Saito     // using the loop's.
73f2ec16ccSHideki Saito     if (I->getDebugLoc())
74f2ec16ccSHideki Saito       DL = I->getDebugLoc();
75f2ec16ccSHideki Saito   }
76f2ec16ccSHideki Saito 
77f2ec16ccSHideki Saito   OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
78f2ec16ccSHideki Saito   R << "loop not vectorized: ";
79f2ec16ccSHideki Saito   return R;
80f2ec16ccSHideki Saito }
81f2ec16ccSHideki Saito 
82f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) {
83f2ec16ccSHideki Saito   switch (Kind) {
84f2ec16ccSHideki Saito   case HK_WIDTH:
85f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
86f2ec16ccSHideki Saito   case HK_UNROLL:
87f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
88f2ec16ccSHideki Saito   case HK_FORCE:
89f2ec16ccSHideki Saito     return (Val <= 1);
90f2ec16ccSHideki Saito   case HK_ISVECTORIZED:
91*20b198ecSSjoerd Meijer   case HK_PREDICATE:
92f2ec16ccSHideki Saito     return (Val == 0 || Val == 1);
93f2ec16ccSHideki Saito   }
94f2ec16ccSHideki Saito   return false;
95f2ec16ccSHideki Saito }
96f2ec16ccSHideki Saito 
97d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
98d4eb13c8SMichael Kruse                                        bool InterleaveOnlyWhenForced,
99f2ec16ccSHideki Saito                                        OptimizationRemarkEmitter &ORE)
100f2ec16ccSHideki Saito     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
101d4eb13c8SMichael Kruse       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
102f2ec16ccSHideki Saito       Force("vectorize.enable", FK_Undefined, HK_FORCE),
103*20b198ecSSjoerd Meijer       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
104*20b198ecSSjoerd Meijer       Predicate("vectorize.predicate.enable", 0, HK_PREDICATE), TheLoop(L),
105*20b198ecSSjoerd Meijer       ORE(ORE) {
106f2ec16ccSHideki Saito   // Populate values with existing loop metadata.
107f2ec16ccSHideki Saito   getHintsFromMetadata();
108f2ec16ccSHideki Saito 
109f2ec16ccSHideki Saito   // force-vector-interleave overrides DisableInterleaving.
110f2ec16ccSHideki Saito   if (VectorizerParams::isInterleaveForced())
111f2ec16ccSHideki Saito     Interleave.Value = VectorizerParams::VectorizationInterleave;
112f2ec16ccSHideki Saito 
113f2ec16ccSHideki Saito   if (IsVectorized.Value != 1)
114f2ec16ccSHideki Saito     // If the vectorization width and interleaving count are both 1 then
115f2ec16ccSHideki Saito     // consider the loop to have been already vectorized because there's
116f2ec16ccSHideki Saito     // nothing more that we can do.
117f2ec16ccSHideki Saito     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
118d4eb13c8SMichael Kruse   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
119f2ec16ccSHideki Saito              << "LV: Interleaving disabled by the pass manager\n");
120f2ec16ccSHideki Saito }
121f2ec16ccSHideki Saito 
12277a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() {
12377a614a6SMichael Kruse   LLVMContext &Context = TheLoop->getHeader()->getContext();
12477a614a6SMichael Kruse 
12577a614a6SMichael Kruse   MDNode *IsVectorizedMD = MDNode::get(
12677a614a6SMichael Kruse       Context,
12777a614a6SMichael Kruse       {MDString::get(Context, "llvm.loop.isvectorized"),
12877a614a6SMichael Kruse        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
12977a614a6SMichael Kruse   MDNode *LoopID = TheLoop->getLoopID();
13077a614a6SMichael Kruse   MDNode *NewLoopID =
13177a614a6SMichael Kruse       makePostTransformationMetadata(Context, LoopID,
13277a614a6SMichael Kruse                                      {Twine(Prefix(), "vectorize.").str(),
13377a614a6SMichael Kruse                                       Twine(Prefix(), "interleave.").str()},
13477a614a6SMichael Kruse                                      {IsVectorizedMD});
13577a614a6SMichael Kruse   TheLoop->setLoopID(NewLoopID);
13677a614a6SMichael Kruse 
13777a614a6SMichael Kruse   // Update internal cache.
13877a614a6SMichael Kruse   IsVectorized.Value = 1;
13977a614a6SMichael Kruse }
14077a614a6SMichael Kruse 
141d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization(
142d4eb13c8SMichael Kruse     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
143f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled) {
144d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
145f2ec16ccSHideki Saito     emitRemarkWithHints();
146f2ec16ccSHideki Saito     return false;
147f2ec16ccSHideki Saito   }
148f2ec16ccSHideki Saito 
149d4eb13c8SMichael Kruse   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
150d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
151f2ec16ccSHideki Saito     emitRemarkWithHints();
152f2ec16ccSHideki Saito     return false;
153f2ec16ccSHideki Saito   }
154f2ec16ccSHideki Saito 
155f2ec16ccSHideki Saito   if (getIsVectorized() == 1) {
156d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
157f2ec16ccSHideki Saito     // FIXME: Add interleave.disable metadata. This will allow
158f2ec16ccSHideki Saito     // vectorize.disable to be used without disabling the pass and errors
159f2ec16ccSHideki Saito     // to differentiate between disabled vectorization and a width of 1.
160f2ec16ccSHideki Saito     ORE.emit([&]() {
161f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
162f2ec16ccSHideki Saito                                         "AllDisabled", L->getStartLoc(),
163f2ec16ccSHideki Saito                                         L->getHeader())
164f2ec16ccSHideki Saito              << "loop not vectorized: vectorization and interleaving are "
165f2ec16ccSHideki Saito                 "explicitly disabled, or the loop has already been "
166f2ec16ccSHideki Saito                 "vectorized";
167f2ec16ccSHideki Saito     });
168f2ec16ccSHideki Saito     return false;
169f2ec16ccSHideki Saito   }
170f2ec16ccSHideki Saito 
171f2ec16ccSHideki Saito   return true;
172f2ec16ccSHideki Saito }
173f2ec16ccSHideki Saito 
174f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const {
175f2ec16ccSHideki Saito   using namespace ore;
176f2ec16ccSHideki Saito 
177f2ec16ccSHideki Saito   ORE.emit([&]() {
178f2ec16ccSHideki Saito     if (Force.Value == LoopVectorizeHints::FK_Disabled)
179f2ec16ccSHideki Saito       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
180f2ec16ccSHideki Saito                                       TheLoop->getStartLoc(),
181f2ec16ccSHideki Saito                                       TheLoop->getHeader())
182f2ec16ccSHideki Saito              << "loop not vectorized: vectorization is explicitly disabled";
183f2ec16ccSHideki Saito     else {
184f2ec16ccSHideki Saito       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
185f2ec16ccSHideki Saito                                  TheLoop->getStartLoc(), TheLoop->getHeader());
186f2ec16ccSHideki Saito       R << "loop not vectorized";
187f2ec16ccSHideki Saito       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
188f2ec16ccSHideki Saito         R << " (Force=" << NV("Force", true);
189f2ec16ccSHideki Saito         if (Width.Value != 0)
190f2ec16ccSHideki Saito           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
191f2ec16ccSHideki Saito         if (Interleave.Value != 0)
192f2ec16ccSHideki Saito           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
193f2ec16ccSHideki Saito         R << ")";
194f2ec16ccSHideki Saito       }
195f2ec16ccSHideki Saito       return R;
196f2ec16ccSHideki Saito     }
197f2ec16ccSHideki Saito   });
198f2ec16ccSHideki Saito }
199f2ec16ccSHideki Saito 
200f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
201f2ec16ccSHideki Saito   if (getWidth() == 1)
202f2ec16ccSHideki Saito     return LV_NAME;
203f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled)
204f2ec16ccSHideki Saito     return LV_NAME;
205f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
206f2ec16ccSHideki Saito     return LV_NAME;
207f2ec16ccSHideki Saito   return OptimizationRemarkAnalysis::AlwaysPrint;
208f2ec16ccSHideki Saito }
209f2ec16ccSHideki Saito 
210f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() {
211f2ec16ccSHideki Saito   MDNode *LoopID = TheLoop->getLoopID();
212f2ec16ccSHideki Saito   if (!LoopID)
213f2ec16ccSHideki Saito     return;
214f2ec16ccSHideki Saito 
215f2ec16ccSHideki Saito   // First operand should refer to the loop id itself.
216f2ec16ccSHideki Saito   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
217f2ec16ccSHideki Saito   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
218f2ec16ccSHideki Saito 
219f2ec16ccSHideki Saito   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
220f2ec16ccSHideki Saito     const MDString *S = nullptr;
221f2ec16ccSHideki Saito     SmallVector<Metadata *, 4> Args;
222f2ec16ccSHideki Saito 
223f2ec16ccSHideki Saito     // The expected hint is either a MDString or a MDNode with the first
224f2ec16ccSHideki Saito     // operand a MDString.
225f2ec16ccSHideki Saito     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
226f2ec16ccSHideki Saito       if (!MD || MD->getNumOperands() == 0)
227f2ec16ccSHideki Saito         continue;
228f2ec16ccSHideki Saito       S = dyn_cast<MDString>(MD->getOperand(0));
229f2ec16ccSHideki Saito       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
230f2ec16ccSHideki Saito         Args.push_back(MD->getOperand(i));
231f2ec16ccSHideki Saito     } else {
232f2ec16ccSHideki Saito       S = dyn_cast<MDString>(LoopID->getOperand(i));
233f2ec16ccSHideki Saito       assert(Args.size() == 0 && "too many arguments for MDString");
234f2ec16ccSHideki Saito     }
235f2ec16ccSHideki Saito 
236f2ec16ccSHideki Saito     if (!S)
237f2ec16ccSHideki Saito       continue;
238f2ec16ccSHideki Saito 
239f2ec16ccSHideki Saito     // Check if the hint starts with the loop metadata prefix.
240f2ec16ccSHideki Saito     StringRef Name = S->getString();
241f2ec16ccSHideki Saito     if (Args.size() == 1)
242f2ec16ccSHideki Saito       setHint(Name, Args[0]);
243f2ec16ccSHideki Saito   }
244f2ec16ccSHideki Saito }
245f2ec16ccSHideki Saito 
246f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
247f2ec16ccSHideki Saito   if (!Name.startswith(Prefix()))
248f2ec16ccSHideki Saito     return;
249f2ec16ccSHideki Saito   Name = Name.substr(Prefix().size(), StringRef::npos);
250f2ec16ccSHideki Saito 
251f2ec16ccSHideki Saito   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
252f2ec16ccSHideki Saito   if (!C)
253f2ec16ccSHideki Saito     return;
254f2ec16ccSHideki Saito   unsigned Val = C->getZExtValue();
255f2ec16ccSHideki Saito 
256*20b198ecSSjoerd Meijer   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate};
257f2ec16ccSHideki Saito   for (auto H : Hints) {
258f2ec16ccSHideki Saito     if (Name == H->Name) {
259f2ec16ccSHideki Saito       if (H->validate(Val))
260f2ec16ccSHideki Saito         H->Value = Val;
261f2ec16ccSHideki Saito       else
262d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
263f2ec16ccSHideki Saito       break;
264f2ec16ccSHideki Saito     }
265f2ec16ccSHideki Saito   }
266f2ec16ccSHideki Saito }
267f2ec16ccSHideki Saito 
268f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet(
269f2ec16ccSHideki Saito     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
270f2ec16ccSHideki Saito   const char *PassName = Hints.vectorizeAnalysisPassName();
271f2ec16ccSHideki Saito   bool Failed = false;
272f2ec16ccSHideki Saito   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
273f2ec16ccSHideki Saito     ORE.emit([&]() {
274f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisFPCommute(
275f2ec16ccSHideki Saito                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
276f2ec16ccSHideki Saito                  UnsafeAlgebraInst->getParent())
277f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
278f2ec16ccSHideki Saito                 "floating-point operations";
279f2ec16ccSHideki Saito     });
280f2ec16ccSHideki Saito     Failed = true;
281f2ec16ccSHideki Saito   }
282f2ec16ccSHideki Saito 
283f2ec16ccSHideki Saito   // Test if runtime memcheck thresholds are exceeded.
284f2ec16ccSHideki Saito   bool PragmaThresholdReached =
285f2ec16ccSHideki Saito       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
286f2ec16ccSHideki Saito   bool ThresholdReached =
287f2ec16ccSHideki Saito       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
288f2ec16ccSHideki Saito   if ((ThresholdReached && !Hints.allowReordering()) ||
289f2ec16ccSHideki Saito       PragmaThresholdReached) {
290f2ec16ccSHideki Saito     ORE.emit([&]() {
291f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
292f2ec16ccSHideki Saito                                                 L->getStartLoc(),
293f2ec16ccSHideki Saito                                                 L->getHeader())
294f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
295f2ec16ccSHideki Saito                 "memory operations";
296f2ec16ccSHideki Saito     });
297d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
298f2ec16ccSHideki Saito     Failed = true;
299f2ec16ccSHideki Saito   }
300f2ec16ccSHideki Saito 
301f2ec16ccSHideki Saito   return Failed;
302f2ec16ccSHideki Saito }
303f2ec16ccSHideki Saito 
304f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop
305f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
306f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is
307f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is
308f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions:
309f2ec16ccSHideki Saito //   1) it has a canonical IV (starting from 0 and with stride 1),
310f2ec16ccSHideki Saito //   2) its latch terminator is a conditional branch and,
311f2ec16ccSHideki Saito //   3) its latch condition is a compare instruction whose operands are the
312f2ec16ccSHideki Saito //      canonical IV and an OuterLp invariant.
313f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not
314f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity.
315f2ec16ccSHideki Saito //
316f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation
317f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop
318f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and
319f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for
320f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we
321f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer
322f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit
323f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away
324f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not
325f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate
326f2ec16ccSHideki Saito // function that is only executed once for each \p Lp.
327f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
328f2ec16ccSHideki Saito   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
329f2ec16ccSHideki Saito 
330f2ec16ccSHideki Saito   // If Lp is the outer loop, it's uniform by definition.
331f2ec16ccSHideki Saito   if (Lp == OuterLp)
332f2ec16ccSHideki Saito     return true;
333f2ec16ccSHideki Saito   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
334f2ec16ccSHideki Saito 
335f2ec16ccSHideki Saito   // 1.
336f2ec16ccSHideki Saito   PHINode *IV = Lp->getCanonicalInductionVariable();
337f2ec16ccSHideki Saito   if (!IV) {
338d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
339f2ec16ccSHideki Saito     return false;
340f2ec16ccSHideki Saito   }
341f2ec16ccSHideki Saito 
342f2ec16ccSHideki Saito   // 2.
343f2ec16ccSHideki Saito   BasicBlock *Latch = Lp->getLoopLatch();
344f2ec16ccSHideki Saito   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
345f2ec16ccSHideki Saito   if (!LatchBr || LatchBr->isUnconditional()) {
346d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
347f2ec16ccSHideki Saito     return false;
348f2ec16ccSHideki Saito   }
349f2ec16ccSHideki Saito 
350f2ec16ccSHideki Saito   // 3.
351f2ec16ccSHideki Saito   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
352f2ec16ccSHideki Saito   if (!LatchCmp) {
353d34e60caSNicola Zaghen     LLVM_DEBUG(
354d34e60caSNicola Zaghen         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
355f2ec16ccSHideki Saito     return false;
356f2ec16ccSHideki Saito   }
357f2ec16ccSHideki Saito 
358f2ec16ccSHideki Saito   Value *CondOp0 = LatchCmp->getOperand(0);
359f2ec16ccSHideki Saito   Value *CondOp1 = LatchCmp->getOperand(1);
360f2ec16ccSHideki Saito   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
361f2ec16ccSHideki Saito   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
362f2ec16ccSHideki Saito       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
363d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
364f2ec16ccSHideki Saito     return false;
365f2ec16ccSHideki Saito   }
366f2ec16ccSHideki Saito 
367f2ec16ccSHideki Saito   return true;
368f2ec16ccSHideki Saito }
369f2ec16ccSHideki Saito 
370f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p
371f2ec16ccSHideki Saito // OuterLp.
372f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
373f2ec16ccSHideki Saito   if (!isUniformLoop(Lp, OuterLp))
374f2ec16ccSHideki Saito     return false;
375f2ec16ccSHideki Saito 
376f2ec16ccSHideki Saito   // Check if nested loops are uniform.
377f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
378f2ec16ccSHideki Saito     if (!isUniformLoopNest(SubLp, OuterLp))
379f2ec16ccSHideki Saito       return false;
380f2ec16ccSHideki Saito 
381f2ec16ccSHideki Saito   return true;
382f2ec16ccSHideki Saito }
383f2ec16ccSHideki Saito 
3845f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node.
385f2ec16ccSHideki Saito ///
386f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if
387f2ec16ccSHideki Saito /// convert.
388f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) {
389f2ec16ccSHideki Saito   for (PHINode &Phi : BB->phis()) {
390f2ec16ccSHideki Saito     for (Value *V : Phi.incoming_values())
391f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(V))
392f2ec16ccSHideki Saito         if (C->canTrap())
393f2ec16ccSHideki Saito           return false;
394f2ec16ccSHideki Saito   }
395f2ec16ccSHideki Saito   return true;
396f2ec16ccSHideki Saito }
397f2ec16ccSHideki Saito 
398f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
399f2ec16ccSHideki Saito   if (Ty->isPointerTy())
400f2ec16ccSHideki Saito     return DL.getIntPtrType(Ty);
401f2ec16ccSHideki Saito 
402f2ec16ccSHideki Saito   // It is possible that char's or short's overflow when we ask for the loop's
403f2ec16ccSHideki Saito   // trip count, work around this by changing the type size.
404f2ec16ccSHideki Saito   if (Ty->getScalarSizeInBits() < 32)
405f2ec16ccSHideki Saito     return Type::getInt32Ty(Ty->getContext());
406f2ec16ccSHideki Saito 
407f2ec16ccSHideki Saito   return Ty;
408f2ec16ccSHideki Saito }
409f2ec16ccSHideki Saito 
410f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
411f2ec16ccSHideki Saito   Ty0 = convertPointerToIntegerType(DL, Ty0);
412f2ec16ccSHideki Saito   Ty1 = convertPointerToIntegerType(DL, Ty1);
413f2ec16ccSHideki Saito   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
414f2ec16ccSHideki Saito     return Ty0;
415f2ec16ccSHideki Saito   return Ty1;
416f2ec16ccSHideki Saito }
417f2ec16ccSHideki Saito 
4185f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an
419f2ec16ccSHideki Saito /// identified reduction variable.
420f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
421f2ec16ccSHideki Saito                                SmallPtrSetImpl<Value *> &AllowedExit) {
42260a1e4ddSAnna Thomas   // Reductions, Inductions and non-header phis are allowed to have exit users. All
423f2ec16ccSHideki Saito   // other instructions must not have external users.
424f2ec16ccSHideki Saito   if (!AllowedExit.count(Inst))
425f2ec16ccSHideki Saito     // Check that all of the users of the loop are inside the BB.
426f2ec16ccSHideki Saito     for (User *U : Inst->users()) {
427f2ec16ccSHideki Saito       Instruction *UI = cast<Instruction>(U);
428f2ec16ccSHideki Saito       // This user may be a reduction exit value.
429f2ec16ccSHideki Saito       if (!TheLoop->contains(UI)) {
430d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
431f2ec16ccSHideki Saito         return true;
432f2ec16ccSHideki Saito       }
433f2ec16ccSHideki Saito     }
434f2ec16ccSHideki Saito   return false;
435f2ec16ccSHideki Saito }
436f2ec16ccSHideki Saito 
437f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
438f2ec16ccSHideki Saito   const ValueToValueMap &Strides =
439f2ec16ccSHideki Saito       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
440f2ec16ccSHideki Saito 
441f2ec16ccSHideki Saito   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
442f2ec16ccSHideki Saito   if (Stride == 1 || Stride == -1)
443f2ec16ccSHideki Saito     return Stride;
444f2ec16ccSHideki Saito   return 0;
445f2ec16ccSHideki Saito }
446f2ec16ccSHideki Saito 
447f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) {
448f2ec16ccSHideki Saito   return LAI->isUniform(V);
449f2ec16ccSHideki Saito }
450f2ec16ccSHideki Saito 
4519e97caf5SRenato Golin void LoopVectorizationLegality::reportVectorizationFailure(
4529e97caf5SRenato Golin     const StringRef DebugMsg, const StringRef OREMsg,
4539e97caf5SRenato Golin     const StringRef ORETag, Instruction *I) const {
4549e97caf5SRenato Golin   LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
4559e97caf5SRenato Golin   ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
4569e97caf5SRenato Golin       ORETag, TheLoop, I) << OREMsg);
4579e97caf5SRenato Golin }
4589e97caf5SRenato Golin 
459f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() {
460f2ec16ccSHideki Saito   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
461f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
462f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
463f2ec16ccSHideki Saito   bool Result = true;
464f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
465f2ec16ccSHideki Saito 
466f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
467f2ec16ccSHideki Saito     // Check whether the BB terminator is a BranchInst. Any other terminator is
468f2ec16ccSHideki Saito     // not supported yet.
469f2ec16ccSHideki Saito     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
470f2ec16ccSHideki Saito     if (!Br) {
4719e97caf5SRenato Golin       reportVectorizationFailure("Unsupported basic block terminator",
4729e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
4739e97caf5SRenato Golin           "CFGNotUnderstood");
474f2ec16ccSHideki Saito       if (DoExtraAnalysis)
475f2ec16ccSHideki Saito         Result = false;
476f2ec16ccSHideki Saito       else
477f2ec16ccSHideki Saito         return false;
478f2ec16ccSHideki Saito     }
479f2ec16ccSHideki Saito 
480f2ec16ccSHideki Saito     // Check whether the BranchInst is a supported one. Only unconditional
481f2ec16ccSHideki Saito     // branches, conditional branches with an outer loop invariant condition or
482f2ec16ccSHideki Saito     // backedges are supported.
4834e4ecae0SHideki Saito     // FIXME: We skip these checks when VPlan predication is enabled as we
4844e4ecae0SHideki Saito     // want to allow divergent branches. This whole check will be removed
4854e4ecae0SHideki Saito     // once VPlan predication is on by default.
4864e4ecae0SHideki Saito     if (!EnableVPlanPredication && Br && Br->isConditional() &&
487f2ec16ccSHideki Saito         !TheLoop->isLoopInvariant(Br->getCondition()) &&
488f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(0)) &&
489f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(1))) {
4909e97caf5SRenato Golin       reportVectorizationFailure("Unsupported conditional branch",
4919e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
4929e97caf5SRenato Golin           "CFGNotUnderstood");
493f2ec16ccSHideki Saito       if (DoExtraAnalysis)
494f2ec16ccSHideki Saito         Result = false;
495f2ec16ccSHideki Saito       else
496f2ec16ccSHideki Saito         return false;
497f2ec16ccSHideki Saito     }
498f2ec16ccSHideki Saito   }
499f2ec16ccSHideki Saito 
500f2ec16ccSHideki Saito   // Check whether inner loops are uniform. At this point, we only support
501f2ec16ccSHideki Saito   // simple outer loops scenarios with uniform nested loops.
502f2ec16ccSHideki Saito   if (!isUniformLoopNest(TheLoop /*loop nest*/,
503f2ec16ccSHideki Saito                          TheLoop /*context outer loop*/)) {
5049e97caf5SRenato Golin     reportVectorizationFailure("Outer loop contains divergent loops",
5059e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
5069e97caf5SRenato Golin         "CFGNotUnderstood");
507f2ec16ccSHideki Saito     if (DoExtraAnalysis)
508f2ec16ccSHideki Saito       Result = false;
509f2ec16ccSHideki Saito     else
510f2ec16ccSHideki Saito       return false;
511f2ec16ccSHideki Saito   }
512f2ec16ccSHideki Saito 
513ea7f3035SHideki Saito   // Check whether we are able to set up outer loop induction.
514ea7f3035SHideki Saito   if (!setupOuterLoopInductions()) {
5159e97caf5SRenato Golin     reportVectorizationFailure("Unsupported outer loop Phi(s)",
5169e97caf5SRenato Golin                                "Unsupported outer loop Phi(s)",
5179e97caf5SRenato Golin                                "UnsupportedPhi");
518ea7f3035SHideki Saito     if (DoExtraAnalysis)
519ea7f3035SHideki Saito       Result = false;
520ea7f3035SHideki Saito     else
521ea7f3035SHideki Saito       return false;
522ea7f3035SHideki Saito   }
523ea7f3035SHideki Saito 
524f2ec16ccSHideki Saito   return Result;
525f2ec16ccSHideki Saito }
526f2ec16ccSHideki Saito 
527f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi(
528f2ec16ccSHideki Saito     PHINode *Phi, const InductionDescriptor &ID,
529f2ec16ccSHideki Saito     SmallPtrSetImpl<Value *> &AllowedExit) {
530f2ec16ccSHideki Saito   Inductions[Phi] = ID;
531f2ec16ccSHideki Saito 
532f2ec16ccSHideki Saito   // In case this induction also comes with casts that we know we can ignore
533f2ec16ccSHideki Saito   // in the vectorized loop body, record them here. All casts could be recorded
534f2ec16ccSHideki Saito   // here for ignoring, but suffices to record only the first (as it is the
535f2ec16ccSHideki Saito   // only one that may bw used outside the cast sequence).
536f2ec16ccSHideki Saito   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
537f2ec16ccSHideki Saito   if (!Casts.empty())
538f2ec16ccSHideki Saito     InductionCastsToIgnore.insert(*Casts.begin());
539f2ec16ccSHideki Saito 
540f2ec16ccSHideki Saito   Type *PhiTy = Phi->getType();
541f2ec16ccSHideki Saito   const DataLayout &DL = Phi->getModule()->getDataLayout();
542f2ec16ccSHideki Saito 
543f2ec16ccSHideki Saito   // Get the widest type.
544f2ec16ccSHideki Saito   if (!PhiTy->isFloatingPointTy()) {
545f2ec16ccSHideki Saito     if (!WidestIndTy)
546f2ec16ccSHideki Saito       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
547f2ec16ccSHideki Saito     else
548f2ec16ccSHideki Saito       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
549f2ec16ccSHideki Saito   }
550f2ec16ccSHideki Saito 
551f2ec16ccSHideki Saito   // Int inductions are special because we only allow one IV.
552f2ec16ccSHideki Saito   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
553f2ec16ccSHideki Saito       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
554f2ec16ccSHideki Saito       isa<Constant>(ID.getStartValue()) &&
555f2ec16ccSHideki Saito       cast<Constant>(ID.getStartValue())->isNullValue()) {
556f2ec16ccSHideki Saito 
557f2ec16ccSHideki Saito     // Use the phi node with the widest type as induction. Use the last
558f2ec16ccSHideki Saito     // one if there are multiple (no good reason for doing this other
559f2ec16ccSHideki Saito     // than it is expedient). We've checked that it begins at zero and
560f2ec16ccSHideki Saito     // steps by one, so this is a canonical induction variable.
561f2ec16ccSHideki Saito     if (!PrimaryInduction || PhiTy == WidestIndTy)
562f2ec16ccSHideki Saito       PrimaryInduction = Phi;
563f2ec16ccSHideki Saito   }
564f2ec16ccSHideki Saito 
565f2ec16ccSHideki Saito   // Both the PHI node itself, and the "post-increment" value feeding
566f2ec16ccSHideki Saito   // back into the PHI node may have external users.
567f2ec16ccSHideki Saito   // We can allow those uses, except if the SCEVs we have for them rely
568f2ec16ccSHideki Saito   // on predicates that only hold within the loop, since allowing the exit
5696a1dd77fSAnna Thomas   // currently means re-using this SCEV outside the loop (see PR33706 for more
5706a1dd77fSAnna Thomas   // details).
571f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().isAlwaysTrue()) {
572f2ec16ccSHideki Saito     AllowedExit.insert(Phi);
573f2ec16ccSHideki Saito     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
574f2ec16ccSHideki Saito   }
575f2ec16ccSHideki Saito 
576d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
577f2ec16ccSHideki Saito }
578f2ec16ccSHideki Saito 
579ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() {
580ea7f3035SHideki Saito   BasicBlock *Header = TheLoop->getHeader();
581ea7f3035SHideki Saito 
582ea7f3035SHideki Saito   // Returns true if a given Phi is a supported induction.
583ea7f3035SHideki Saito   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
584ea7f3035SHideki Saito     InductionDescriptor ID;
585ea7f3035SHideki Saito     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
586ea7f3035SHideki Saito         ID.getKind() == InductionDescriptor::IK_IntInduction) {
587ea7f3035SHideki Saito       addInductionPhi(&Phi, ID, AllowedExit);
588ea7f3035SHideki Saito       return true;
589ea7f3035SHideki Saito     } else {
590ea7f3035SHideki Saito       // Bail out for any Phi in the outer loop header that is not a supported
591ea7f3035SHideki Saito       // induction.
592ea7f3035SHideki Saito       LLVM_DEBUG(
593ea7f3035SHideki Saito           dbgs()
594ea7f3035SHideki Saito           << "LV: Found unsupported PHI for outer loop vectorization.\n");
595ea7f3035SHideki Saito       return false;
596ea7f3035SHideki Saito     }
597ea7f3035SHideki Saito   };
598ea7f3035SHideki Saito 
599ea7f3035SHideki Saito   if (llvm::all_of(Header->phis(), isSupportedPhi))
600ea7f3035SHideki Saito     return true;
601ea7f3035SHideki Saito   else
602ea7f3035SHideki Saito     return false;
603ea7f3035SHideki Saito }
604ea7f3035SHideki Saito 
605f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() {
606f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
607f2ec16ccSHideki Saito 
608f2ec16ccSHideki Saito   // Look for the attribute signaling the absence of NaNs.
609f2ec16ccSHideki Saito   Function &F = *Header->getParent();
610f2ec16ccSHideki Saito   HasFunNoNaNAttr =
611f2ec16ccSHideki Saito       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
612f2ec16ccSHideki Saito 
613f2ec16ccSHideki Saito   // For each block in the loop.
614f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
615f2ec16ccSHideki Saito     // Scan the instructions in the block and look for hazards.
616f2ec16ccSHideki Saito     for (Instruction &I : *BB) {
617f2ec16ccSHideki Saito       if (auto *Phi = dyn_cast<PHINode>(&I)) {
618f2ec16ccSHideki Saito         Type *PhiTy = Phi->getType();
619f2ec16ccSHideki Saito         // Check that this PHI type is allowed.
620f2ec16ccSHideki Saito         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
621f2ec16ccSHideki Saito             !PhiTy->isPointerTy()) {
6229e97caf5SRenato Golin           reportVectorizationFailure("Found a non-int non-pointer PHI",
6239e97caf5SRenato Golin                                      "loop control flow is not understood by vectorizer",
6249e97caf5SRenato Golin                                      "CFGNotUnderstood");
625f2ec16ccSHideki Saito           return false;
626f2ec16ccSHideki Saito         }
627f2ec16ccSHideki Saito 
628f2ec16ccSHideki Saito         // If this PHINode is not in the header block, then we know that we
629f2ec16ccSHideki Saito         // can convert it to select during if-conversion. No need to check if
630f2ec16ccSHideki Saito         // the PHIs in this block are induction or reduction variables.
631f2ec16ccSHideki Saito         if (BB != Header) {
63260a1e4ddSAnna Thomas           // Non-header phi nodes that have outside uses can be vectorized. Add
63360a1e4ddSAnna Thomas           // them to the list of allowed exits.
63460a1e4ddSAnna Thomas           // Unsafe cyclic dependencies with header phis are identified during
63560a1e4ddSAnna Thomas           // legalization for reduction, induction and first order
63660a1e4ddSAnna Thomas           // recurrences.
637f2ec16ccSHideki Saito           continue;
638f2ec16ccSHideki Saito         }
639f2ec16ccSHideki Saito 
640f2ec16ccSHideki Saito         // We only allow if-converted PHIs with exactly two incoming values.
641f2ec16ccSHideki Saito         if (Phi->getNumIncomingValues() != 2) {
6429e97caf5SRenato Golin           reportVectorizationFailure("Found an invalid PHI",
6439e97caf5SRenato Golin               "loop control flow is not understood by vectorizer",
6449e97caf5SRenato Golin               "CFGNotUnderstood", Phi);
645f2ec16ccSHideki Saito           return false;
646f2ec16ccSHideki Saito         }
647f2ec16ccSHideki Saito 
648f2ec16ccSHideki Saito         RecurrenceDescriptor RedDes;
649f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
650f2ec16ccSHideki Saito                                                  DT)) {
651f2ec16ccSHideki Saito           if (RedDes.hasUnsafeAlgebra())
652f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
653f2ec16ccSHideki Saito           AllowedExit.insert(RedDes.getLoopExitInstr());
654f2ec16ccSHideki Saito           Reductions[Phi] = RedDes;
655f2ec16ccSHideki Saito           continue;
656f2ec16ccSHideki Saito         }
657f2ec16ccSHideki Saito 
658b02b0ad8SAnna Thomas         // TODO: Instead of recording the AllowedExit, it would be good to record the
659b02b0ad8SAnna Thomas         // complementary set: NotAllowedExit. These include (but may not be
660b02b0ad8SAnna Thomas         // limited to):
661b02b0ad8SAnna Thomas         // 1. Reduction phis as they represent the one-before-last value, which
662b02b0ad8SAnna Thomas         // is not available when vectorized
663b02b0ad8SAnna Thomas         // 2. Induction phis and increment when SCEV predicates cannot be used
664b02b0ad8SAnna Thomas         // outside the loop - see addInductionPhi
665b02b0ad8SAnna Thomas         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
666b02b0ad8SAnna Thomas         // outside the loop - see call to hasOutsideLoopUser in the non-phi
667b02b0ad8SAnna Thomas         // handling below
668b02b0ad8SAnna Thomas         // 4. FirstOrderRecurrence phis that can possibly be handled by
669b02b0ad8SAnna Thomas         // extraction.
670b02b0ad8SAnna Thomas         // By recording these, we can then reason about ways to vectorize each
671b02b0ad8SAnna Thomas         // of these NotAllowedExit.
672f2ec16ccSHideki Saito         InductionDescriptor ID;
673f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
674f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
675f2ec16ccSHideki Saito           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
676f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
677f2ec16ccSHideki Saito           continue;
678f2ec16ccSHideki Saito         }
679f2ec16ccSHideki Saito 
680f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
681f2ec16ccSHideki Saito                                                          SinkAfter, DT)) {
682f2ec16ccSHideki Saito           FirstOrderRecurrences.insert(Phi);
683f2ec16ccSHideki Saito           continue;
684f2ec16ccSHideki Saito         }
685f2ec16ccSHideki Saito 
686f2ec16ccSHideki Saito         // As a last resort, coerce the PHI to a AddRec expression
687f2ec16ccSHideki Saito         // and re-try classifying it a an induction PHI.
688f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
689f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
690f2ec16ccSHideki Saito           continue;
691f2ec16ccSHideki Saito         }
692f2ec16ccSHideki Saito 
6939e97caf5SRenato Golin         reportVectorizationFailure("Found an unidentified PHI",
6949e97caf5SRenato Golin             "value that could not be identified as "
6959e97caf5SRenato Golin             "reduction is used outside the loop",
6969e97caf5SRenato Golin             "NonReductionValueUsedOutsideLoop", Phi);
697f2ec16ccSHideki Saito         return false;
698f2ec16ccSHideki Saito       } // end of PHI handling
699f2ec16ccSHideki Saito 
700f2ec16ccSHideki Saito       // We handle calls that:
701f2ec16ccSHideki Saito       //   * Are debug info intrinsics.
702f2ec16ccSHideki Saito       //   * Have a mapping to an IR intrinsic.
703f2ec16ccSHideki Saito       //   * Have a vector version available.
704f2ec16ccSHideki Saito       auto *CI = dyn_cast<CallInst>(&I);
705f2ec16ccSHideki Saito       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
706f2ec16ccSHideki Saito           !isa<DbgInfoIntrinsic>(CI) &&
707f2ec16ccSHideki Saito           !(CI->getCalledFunction() && TLI &&
708f2ec16ccSHideki Saito             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
7097d65fe5cSSanjay Patel         // If the call is a recognized math libary call, it is likely that
7107d65fe5cSSanjay Patel         // we can vectorize it given loosened floating-point constraints.
7117d65fe5cSSanjay Patel         LibFunc Func;
7127d65fe5cSSanjay Patel         bool IsMathLibCall =
7137d65fe5cSSanjay Patel             TLI && CI->getCalledFunction() &&
7147d65fe5cSSanjay Patel             CI->getType()->isFloatingPointTy() &&
7157d65fe5cSSanjay Patel             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
7167d65fe5cSSanjay Patel             TLI->hasOptimizedCodeGen(Func);
7177d65fe5cSSanjay Patel 
7187d65fe5cSSanjay Patel         if (IsMathLibCall) {
7197d65fe5cSSanjay Patel           // TODO: Ideally, we should not use clang-specific language here,
7207d65fe5cSSanjay Patel           // but it's hard to provide meaningful yet generic advice.
7217d65fe5cSSanjay Patel           // Also, should this be guarded by allowExtraAnalysis() and/or be part
7227d65fe5cSSanjay Patel           // of the returned info from isFunctionVectorizable()?
7239e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
7249e97caf5SRenato Golin               "library call cannot be vectorized. "
7257d65fe5cSSanjay Patel               "Try compiling with -fno-math-errno, -ffast-math, "
7269e97caf5SRenato Golin               "or similar flags",
7279e97caf5SRenato Golin               "CantVectorizeLibcall", CI);
7287d65fe5cSSanjay Patel         } else {
7299e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
7309e97caf5SRenato Golin                                      "call instruction cannot be vectorized",
7319e97caf5SRenato Golin                                      "CantVectorizeLibcall", CI);
7327d65fe5cSSanjay Patel         }
733f2ec16ccSHideki Saito         return false;
734f2ec16ccSHideki Saito       }
735f2ec16ccSHideki Saito 
736a066f1f9SSimon Pilgrim       // Some intrinsics have scalar arguments and should be same in order for
737a066f1f9SSimon Pilgrim       // them to be vectorized (i.e. loop invariant).
738a066f1f9SSimon Pilgrim       if (CI) {
739f2ec16ccSHideki Saito         auto *SE = PSE.getSE();
740a066f1f9SSimon Pilgrim         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
741a066f1f9SSimon Pilgrim         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
742a066f1f9SSimon Pilgrim           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
743a066f1f9SSimon Pilgrim             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
7449e97caf5SRenato Golin               reportVectorizationFailure("Found unvectorizable intrinsic",
7459e97caf5SRenato Golin                   "intrinsic instruction cannot be vectorized",
7469e97caf5SRenato Golin                   "CantVectorizeIntrinsic", CI);
747f2ec16ccSHideki Saito               return false;
748f2ec16ccSHideki Saito             }
749f2ec16ccSHideki Saito           }
750a066f1f9SSimon Pilgrim       }
751f2ec16ccSHideki Saito 
752f2ec16ccSHideki Saito       // Check that the instruction return type is vectorizable.
753f2ec16ccSHideki Saito       // Also, we can't vectorize extractelement instructions.
754f2ec16ccSHideki Saito       if ((!VectorType::isValidElementType(I.getType()) &&
755f2ec16ccSHideki Saito            !I.getType()->isVoidTy()) ||
756f2ec16ccSHideki Saito           isa<ExtractElementInst>(I)) {
7579e97caf5SRenato Golin         reportVectorizationFailure("Found unvectorizable type",
7589e97caf5SRenato Golin             "instruction return type cannot be vectorized",
7599e97caf5SRenato Golin             "CantVectorizeInstructionReturnType", &I);
760f2ec16ccSHideki Saito         return false;
761f2ec16ccSHideki Saito       }
762f2ec16ccSHideki Saito 
763f2ec16ccSHideki Saito       // Check that the stored type is vectorizable.
764f2ec16ccSHideki Saito       if (auto *ST = dyn_cast<StoreInst>(&I)) {
765f2ec16ccSHideki Saito         Type *T = ST->getValueOperand()->getType();
766f2ec16ccSHideki Saito         if (!VectorType::isValidElementType(T)) {
7679e97caf5SRenato Golin           reportVectorizationFailure("Store instruction cannot be vectorized",
7689e97caf5SRenato Golin                                      "store instruction cannot be vectorized",
7699e97caf5SRenato Golin                                      "CantVectorizeStore", ST);
770f2ec16ccSHideki Saito           return false;
771f2ec16ccSHideki Saito         }
772f2ec16ccSHideki Saito 
7736452bdd2SWarren Ristow         // For nontemporal stores, check that a nontemporal vector version is
7746452bdd2SWarren Ristow         // supported on the target.
7756452bdd2SWarren Ristow         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
7766452bdd2SWarren Ristow           // Arbitrarily try a vector of 2 elements.
7776452bdd2SWarren Ristow           Type *VecTy = VectorType::get(T, /*NumElements=*/2);
7786452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of stored type");
7796452bdd2SWarren Ristow           unsigned Alignment = getLoadStoreAlignment(ST);
7806452bdd2SWarren Ristow           if (!TTI->isLegalNTStore(VecTy, Alignment)) {
7816452bdd2SWarren Ristow             reportVectorizationFailure(
7826452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
7836452bdd2SWarren Ristow                 "nontemporal store instruction cannot be vectorized",
7846452bdd2SWarren Ristow                 "CantVectorizeNontemporalStore", ST);
7856452bdd2SWarren Ristow             return false;
7866452bdd2SWarren Ristow           }
7876452bdd2SWarren Ristow         }
7886452bdd2SWarren Ristow 
7896452bdd2SWarren Ristow       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
7906452bdd2SWarren Ristow         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
7916452bdd2SWarren Ristow           // For nontemporal loads, check that a nontemporal vector version is
7926452bdd2SWarren Ristow           // supported on the target (arbitrarily try a vector of 2 elements).
7936452bdd2SWarren Ristow           Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2);
7946452bdd2SWarren Ristow           assert(VecTy && "did not find vectorized version of load type");
7956452bdd2SWarren Ristow           unsigned Alignment = getLoadStoreAlignment(LD);
7966452bdd2SWarren Ristow           if (!TTI->isLegalNTLoad(VecTy, Alignment)) {
7976452bdd2SWarren Ristow             reportVectorizationFailure(
7986452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
7996452bdd2SWarren Ristow                 "nontemporal load instruction cannot be vectorized",
8006452bdd2SWarren Ristow                 "CantVectorizeNontemporalLoad", LD);
8016452bdd2SWarren Ristow             return false;
8026452bdd2SWarren Ristow           }
8036452bdd2SWarren Ristow         }
8046452bdd2SWarren Ristow 
805f2ec16ccSHideki Saito         // FP instructions can allow unsafe algebra, thus vectorizable by
806f2ec16ccSHideki Saito         // non-IEEE-754 compliant SIMD units.
807f2ec16ccSHideki Saito         // This applies to floating-point math operations and calls, not memory
808f2ec16ccSHideki Saito         // operations, shuffles, or casts, as they don't change precision or
809f2ec16ccSHideki Saito         // semantics.
810f2ec16ccSHideki Saito       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
811f2ec16ccSHideki Saito                  !I.isFast()) {
812d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
813f2ec16ccSHideki Saito         Hints->setPotentiallyUnsafe();
814f2ec16ccSHideki Saito       }
815f2ec16ccSHideki Saito 
816f2ec16ccSHideki Saito       // Reduction instructions are allowed to have exit users.
817f2ec16ccSHideki Saito       // All other instructions must not have external users.
818f2ec16ccSHideki Saito       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
819b02b0ad8SAnna Thomas         // We can safely vectorize loops where instructions within the loop are
820b02b0ad8SAnna Thomas         // used outside the loop only if the SCEV predicates within the loop is
821b02b0ad8SAnna Thomas         // same as outside the loop. Allowing the exit means reusing the SCEV
822b02b0ad8SAnna Thomas         // outside the loop.
823b02b0ad8SAnna Thomas         if (PSE.getUnionPredicate().isAlwaysTrue()) {
824b02b0ad8SAnna Thomas           AllowedExit.insert(&I);
825b02b0ad8SAnna Thomas           continue;
826b02b0ad8SAnna Thomas         }
8279e97caf5SRenato Golin         reportVectorizationFailure("Value cannot be used outside the loop",
8289e97caf5SRenato Golin                                    "value cannot be used outside the loop",
8299e97caf5SRenato Golin                                    "ValueUsedOutsideLoop", &I);
830f2ec16ccSHideki Saito         return false;
831f2ec16ccSHideki Saito       }
832f2ec16ccSHideki Saito     } // next instr.
833f2ec16ccSHideki Saito   }
834f2ec16ccSHideki Saito 
835f2ec16ccSHideki Saito   if (!PrimaryInduction) {
836f2ec16ccSHideki Saito     if (Inductions.empty()) {
8379e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8389e97caf5SRenato Golin           "loop induction variable could not be identified",
8399e97caf5SRenato Golin           "NoInductionVariable");
840f2ec16ccSHideki Saito       return false;
8414f27730eSWarren Ristow     } else if (!WidestIndTy) {
8429e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8439e97caf5SRenato Golin           "integer loop induction variable could not be identified",
8449e97caf5SRenato Golin           "NoIntegerInductionVariable");
8454f27730eSWarren Ristow       return false;
8469e97caf5SRenato Golin     } else {
8479e97caf5SRenato Golin       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
848f2ec16ccSHideki Saito     }
849f2ec16ccSHideki Saito   }
850f2ec16ccSHideki Saito 
851f2ec16ccSHideki Saito   // Now we know the widest induction type, check if our found induction
852f2ec16ccSHideki Saito   // is the same size. If it's not, unset it here and InnerLoopVectorizer
853f2ec16ccSHideki Saito   // will create another.
854f2ec16ccSHideki Saito   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
855f2ec16ccSHideki Saito     PrimaryInduction = nullptr;
856f2ec16ccSHideki Saito 
857f2ec16ccSHideki Saito   return true;
858f2ec16ccSHideki Saito }
859f2ec16ccSHideki Saito 
860f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() {
861f2ec16ccSHideki Saito   LAI = &(*GetLAA)(*TheLoop);
862f2ec16ccSHideki Saito   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
863f2ec16ccSHideki Saito   if (LAR) {
864f2ec16ccSHideki Saito     ORE->emit([&]() {
865f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
866f2ec16ccSHideki Saito                                         "loop not vectorized: ", *LAR);
867f2ec16ccSHideki Saito     });
868f2ec16ccSHideki Saito   }
869f2ec16ccSHideki Saito   if (!LAI->canVectorizeMemory())
870f2ec16ccSHideki Saito     return false;
871f2ec16ccSHideki Saito 
8725e9215f0SAnna Thomas   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
8739e97caf5SRenato Golin     reportVectorizationFailure("Stores to a uniform address",
8749e97caf5SRenato Golin         "write to a loop invariant address could not be vectorized",
8759e97caf5SRenato Golin         "CantVectorizeStoreToLoopInvariantAddress");
876f2ec16ccSHideki Saito     return false;
877f2ec16ccSHideki Saito   }
878f2ec16ccSHideki Saito   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
879f2ec16ccSHideki Saito   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
880f2ec16ccSHideki Saito 
881f2ec16ccSHideki Saito   return true;
882f2ec16ccSHideki Saito }
883f2ec16ccSHideki Saito 
884f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
885f2ec16ccSHideki Saito   Value *In0 = const_cast<Value *>(V);
886f2ec16ccSHideki Saito   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
887f2ec16ccSHideki Saito   if (!PN)
888f2ec16ccSHideki Saito     return false;
889f2ec16ccSHideki Saito 
890f2ec16ccSHideki Saito   return Inductions.count(PN);
891f2ec16ccSHideki Saito }
892f2ec16ccSHideki Saito 
893f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
894f2ec16ccSHideki Saito   auto *Inst = dyn_cast<Instruction>(V);
895f2ec16ccSHideki Saito   return (Inst && InductionCastsToIgnore.count(Inst));
896f2ec16ccSHideki Saito }
897f2ec16ccSHideki Saito 
898f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
899f2ec16ccSHideki Saito   return isInductionPhi(V) || isCastedInductionVariable(V);
900f2ec16ccSHideki Saito }
901f2ec16ccSHideki Saito 
902f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
903f2ec16ccSHideki Saito   return FirstOrderRecurrences.count(Phi);
904f2ec16ccSHideki Saito }
905f2ec16ccSHideki Saito 
906f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
907f2ec16ccSHideki Saito   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
908f2ec16ccSHideki Saito }
909f2ec16ccSHideki Saito 
910f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated(
911f2ec16ccSHideki Saito     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
912f2ec16ccSHideki Saito   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
913f2ec16ccSHideki Saito 
914f2ec16ccSHideki Saito   for (Instruction &I : *BB) {
915f2ec16ccSHideki Saito     // Check that we don't have a constant expression that can trap as operand.
916f2ec16ccSHideki Saito     for (Value *Operand : I.operands()) {
917f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(Operand))
918f2ec16ccSHideki Saito         if (C->canTrap())
919f2ec16ccSHideki Saito           return false;
920f2ec16ccSHideki Saito     }
921f2ec16ccSHideki Saito     // We might be able to hoist the load.
922f2ec16ccSHideki Saito     if (I.mayReadFromMemory()) {
923f2ec16ccSHideki Saito       auto *LI = dyn_cast<LoadInst>(&I);
924f2ec16ccSHideki Saito       if (!LI)
925f2ec16ccSHideki Saito         return false;
926f2ec16ccSHideki Saito       if (!SafePtrs.count(LI->getPointerOperand())) {
927f2ec16ccSHideki Saito         // !llvm.mem.parallel_loop_access implies if-conversion safety.
928f2ec16ccSHideki Saito         // Otherwise, record that the load needs (real or emulated) masking
929f2ec16ccSHideki Saito         // and let the cost model decide.
930f2ec16ccSHideki Saito         if (!IsAnnotatedParallel)
931f2ec16ccSHideki Saito           MaskedOp.insert(LI);
932f2ec16ccSHideki Saito         continue;
933f2ec16ccSHideki Saito       }
934f2ec16ccSHideki Saito     }
935f2ec16ccSHideki Saito 
936f2ec16ccSHideki Saito     if (I.mayWriteToMemory()) {
937f2ec16ccSHideki Saito       auto *SI = dyn_cast<StoreInst>(&I);
938f2ec16ccSHideki Saito       if (!SI)
939f2ec16ccSHideki Saito         return false;
940f2ec16ccSHideki Saito       // Predicated store requires some form of masking:
941f2ec16ccSHideki Saito       // 1) masked store HW instruction,
942f2ec16ccSHideki Saito       // 2) emulation via load-blend-store (only if safe and legal to do so,
943f2ec16ccSHideki Saito       //    be aware on the race conditions), or
944f2ec16ccSHideki Saito       // 3) element-by-element predicate check and scalar store.
945f2ec16ccSHideki Saito       MaskedOp.insert(SI);
946f2ec16ccSHideki Saito       continue;
947f2ec16ccSHideki Saito     }
948f2ec16ccSHideki Saito     if (I.mayThrow())
949f2ec16ccSHideki Saito       return false;
950f2ec16ccSHideki Saito   }
951f2ec16ccSHideki Saito 
952f2ec16ccSHideki Saito   return true;
953f2ec16ccSHideki Saito }
954f2ec16ccSHideki Saito 
955f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
956f2ec16ccSHideki Saito   if (!EnableIfConversion) {
9579e97caf5SRenato Golin     reportVectorizationFailure("If-conversion is disabled",
9589e97caf5SRenato Golin                                "if-conversion is disabled",
9599e97caf5SRenato Golin                                "IfConversionDisabled");
960f2ec16ccSHideki Saito     return false;
961f2ec16ccSHideki Saito   }
962f2ec16ccSHideki Saito 
963f2ec16ccSHideki Saito   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
964f2ec16ccSHideki Saito 
965f2ec16ccSHideki Saito   // A list of pointers that we can safely read and write to.
966f2ec16ccSHideki Saito   SmallPtrSet<Value *, 8> SafePointes;
967f2ec16ccSHideki Saito 
968f2ec16ccSHideki Saito   // Collect safe addresses.
969f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
970f2ec16ccSHideki Saito     if (blockNeedsPredication(BB))
971f2ec16ccSHideki Saito       continue;
972f2ec16ccSHideki Saito 
973f2ec16ccSHideki Saito     for (Instruction &I : *BB)
974f2ec16ccSHideki Saito       if (auto *Ptr = getLoadStorePointerOperand(&I))
975f2ec16ccSHideki Saito         SafePointes.insert(Ptr);
976f2ec16ccSHideki Saito   }
977f2ec16ccSHideki Saito 
978f2ec16ccSHideki Saito   // Collect the blocks that need predication.
979f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
980f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
981f2ec16ccSHideki Saito     // We don't support switch statements inside loops.
982f2ec16ccSHideki Saito     if (!isa<BranchInst>(BB->getTerminator())) {
9839e97caf5SRenato Golin       reportVectorizationFailure("Loop contains a switch statement",
9849e97caf5SRenato Golin                                  "loop contains a switch statement",
9859e97caf5SRenato Golin                                  "LoopContainsSwitch", BB->getTerminator());
986f2ec16ccSHideki Saito       return false;
987f2ec16ccSHideki Saito     }
988f2ec16ccSHideki Saito 
989f2ec16ccSHideki Saito     // We must be able to predicate all blocks that need to be predicated.
990f2ec16ccSHideki Saito     if (blockNeedsPredication(BB)) {
991f2ec16ccSHideki Saito       if (!blockCanBePredicated(BB, SafePointes)) {
9929e97caf5SRenato Golin         reportVectorizationFailure(
9939e97caf5SRenato Golin             "Control flow cannot be substituted for a select",
9949e97caf5SRenato Golin             "control flow cannot be substituted for a select",
9959e97caf5SRenato Golin             "NoCFGForSelect", BB->getTerminator());
996f2ec16ccSHideki Saito         return false;
997f2ec16ccSHideki Saito       }
998f2ec16ccSHideki Saito     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
9999e97caf5SRenato Golin       reportVectorizationFailure(
10009e97caf5SRenato Golin           "Control flow cannot be substituted for a select",
10019e97caf5SRenato Golin           "control flow cannot be substituted for a select",
10029e97caf5SRenato Golin           "NoCFGForSelect", BB->getTerminator());
1003f2ec16ccSHideki Saito       return false;
1004f2ec16ccSHideki Saito     }
1005f2ec16ccSHideki Saito   }
1006f2ec16ccSHideki Saito 
1007f2ec16ccSHideki Saito   // We can if-convert this loop.
1008f2ec16ccSHideki Saito   return true;
1009f2ec16ccSHideki Saito }
1010f2ec16ccSHideki Saito 
1011f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG.
1012f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
1013f2ec16ccSHideki Saito                                                     bool UseVPlanNativePath) {
1014f2ec16ccSHideki Saito   assert((UseVPlanNativePath || Lp->empty()) &&
1015f2ec16ccSHideki Saito          "VPlan-native path is not enabled.");
1016f2ec16ccSHideki Saito 
1017f2ec16ccSHideki Saito   // TODO: ORE should be improved to show more accurate information when an
1018f2ec16ccSHideki Saito   // outer loop can't be vectorized because a nested loop is not understood or
1019f2ec16ccSHideki Saito   // legal. Something like: "outer_loop_location: loop not vectorized:
1020f2ec16ccSHideki Saito   // (inner_loop_location) loop control flow is not understood by vectorizer".
1021f2ec16ccSHideki Saito 
1022f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1023f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1024f2ec16ccSHideki Saito   bool Result = true;
1025f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1026f2ec16ccSHideki Saito 
1027f2ec16ccSHideki Saito   // We must have a loop in canonical form. Loops with indirectbr in them cannot
1028f2ec16ccSHideki Saito   // be canonicalized.
1029f2ec16ccSHideki Saito   if (!Lp->getLoopPreheader()) {
10309e97caf5SRenato Golin     reportVectorizationFailure("Loop doesn't have a legal pre-header",
10319e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
10329e97caf5SRenato Golin         "CFGNotUnderstood");
1033f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1034f2ec16ccSHideki Saito       Result = false;
1035f2ec16ccSHideki Saito     else
1036f2ec16ccSHideki Saito       return false;
1037f2ec16ccSHideki Saito   }
1038f2ec16ccSHideki Saito 
1039f2ec16ccSHideki Saito   // We must have a single backedge.
1040f2ec16ccSHideki Saito   if (Lp->getNumBackEdges() != 1) {
10419e97caf5SRenato Golin     reportVectorizationFailure("The loop must have a single backedge",
10429e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
10439e97caf5SRenato Golin         "CFGNotUnderstood");
1044f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1045f2ec16ccSHideki Saito       Result = false;
1046f2ec16ccSHideki Saito     else
1047f2ec16ccSHideki Saito       return false;
1048f2ec16ccSHideki Saito   }
1049f2ec16ccSHideki Saito 
1050f2ec16ccSHideki Saito   // We must have a single exiting block.
1051f2ec16ccSHideki Saito   if (!Lp->getExitingBlock()) {
10529e97caf5SRenato Golin     reportVectorizationFailure("The loop must have an exiting block",
10539e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
10549e97caf5SRenato Golin         "CFGNotUnderstood");
1055f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1056f2ec16ccSHideki Saito       Result = false;
1057f2ec16ccSHideki Saito     else
1058f2ec16ccSHideki Saito       return false;
1059f2ec16ccSHideki Saito   }
1060f2ec16ccSHideki Saito 
1061f2ec16ccSHideki Saito   // We only handle bottom-tested loops, i.e. loop in which the condition is
1062f2ec16ccSHideki Saito   // checked at the end of each iteration. With that we can assume that all
1063f2ec16ccSHideki Saito   // instructions in the loop are executed the same number of times.
1064f2ec16ccSHideki Saito   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
10659e97caf5SRenato Golin     reportVectorizationFailure("The exiting block is not the loop latch",
10669e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
10679e97caf5SRenato Golin         "CFGNotUnderstood");
1068f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1069f2ec16ccSHideki Saito       Result = false;
1070f2ec16ccSHideki Saito     else
1071f2ec16ccSHideki Saito       return false;
1072f2ec16ccSHideki Saito   }
1073f2ec16ccSHideki Saito 
1074f2ec16ccSHideki Saito   return Result;
1075f2ec16ccSHideki Saito }
1076f2ec16ccSHideki Saito 
1077f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1078f2ec16ccSHideki Saito     Loop *Lp, bool UseVPlanNativePath) {
1079f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1080f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1081f2ec16ccSHideki Saito   bool Result = true;
1082f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1083f2ec16ccSHideki Saito   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1084f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1085f2ec16ccSHideki Saito       Result = false;
1086f2ec16ccSHideki Saito     else
1087f2ec16ccSHideki Saito       return false;
1088f2ec16ccSHideki Saito   }
1089f2ec16ccSHideki Saito 
1090f2ec16ccSHideki Saito   // Recursively check whether the loop control flow of nested loops is
1091f2ec16ccSHideki Saito   // understood.
1092f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
1093f2ec16ccSHideki Saito     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1094f2ec16ccSHideki Saito       if (DoExtraAnalysis)
1095f2ec16ccSHideki Saito         Result = false;
1096f2ec16ccSHideki Saito       else
1097f2ec16ccSHideki Saito         return false;
1098f2ec16ccSHideki Saito     }
1099f2ec16ccSHideki Saito 
1100f2ec16ccSHideki Saito   return Result;
1101f2ec16ccSHideki Saito }
1102f2ec16ccSHideki Saito 
1103f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1104f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1105f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1106f2ec16ccSHideki Saito   bool Result = true;
1107f2ec16ccSHideki Saito 
1108f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1109f2ec16ccSHideki Saito   // Check whether the loop-related control flow in the loop nest is expected by
1110f2ec16ccSHideki Saito   // vectorizer.
1111f2ec16ccSHideki Saito   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1112f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1113f2ec16ccSHideki Saito       Result = false;
1114f2ec16ccSHideki Saito     else
1115f2ec16ccSHideki Saito       return false;
1116f2ec16ccSHideki Saito   }
1117f2ec16ccSHideki Saito 
1118f2ec16ccSHideki Saito   // We need to have a loop header.
1119d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1120f2ec16ccSHideki Saito                     << '\n');
1121f2ec16ccSHideki Saito 
1122f2ec16ccSHideki Saito   // Specific checks for outer loops. We skip the remaining legal checks at this
1123f2ec16ccSHideki Saito   // point because they don't support outer loops.
1124f2ec16ccSHideki Saito   if (!TheLoop->empty()) {
1125f2ec16ccSHideki Saito     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1126f2ec16ccSHideki Saito 
1127f2ec16ccSHideki Saito     if (!canVectorizeOuterLoop()) {
11289e97caf5SRenato Golin       reportVectorizationFailure("Unsupported outer loop",
11299e97caf5SRenato Golin                                  "unsupported outer loop",
11309e97caf5SRenato Golin                                  "UnsupportedOuterLoop");
1131f2ec16ccSHideki Saito       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1132f2ec16ccSHideki Saito       // outer loops.
1133f2ec16ccSHideki Saito       return false;
1134f2ec16ccSHideki Saito     }
1135f2ec16ccSHideki Saito 
1136d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1137f2ec16ccSHideki Saito     return Result;
1138f2ec16ccSHideki Saito   }
1139f2ec16ccSHideki Saito 
1140f2ec16ccSHideki Saito   assert(TheLoop->empty() && "Inner loop expected.");
1141f2ec16ccSHideki Saito   // Check if we can if-convert non-single-bb loops.
1142f2ec16ccSHideki Saito   unsigned NumBlocks = TheLoop->getNumBlocks();
1143f2ec16ccSHideki Saito   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1144d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1145f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1146f2ec16ccSHideki Saito       Result = false;
1147f2ec16ccSHideki Saito     else
1148f2ec16ccSHideki Saito       return false;
1149f2ec16ccSHideki Saito   }
1150f2ec16ccSHideki Saito 
1151f2ec16ccSHideki Saito   // Check if we can vectorize the instructions and CFG in this loop.
1152f2ec16ccSHideki Saito   if (!canVectorizeInstrs()) {
1153d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1154f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1155f2ec16ccSHideki Saito       Result = false;
1156f2ec16ccSHideki Saito     else
1157f2ec16ccSHideki Saito       return false;
1158f2ec16ccSHideki Saito   }
1159f2ec16ccSHideki Saito 
1160f2ec16ccSHideki Saito   // Go over each instruction and look at memory deps.
1161f2ec16ccSHideki Saito   if (!canVectorizeMemory()) {
1162d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1163f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1164f2ec16ccSHideki Saito       Result = false;
1165f2ec16ccSHideki Saito     else
1166f2ec16ccSHideki Saito       return false;
1167f2ec16ccSHideki Saito   }
1168f2ec16ccSHideki Saito 
1169d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1170f2ec16ccSHideki Saito                     << (LAI->getRuntimePointerChecking()->Need
1171f2ec16ccSHideki Saito                             ? " (with a runtime bound check)"
1172f2ec16ccSHideki Saito                             : "")
1173f2ec16ccSHideki Saito                     << "!\n");
1174f2ec16ccSHideki Saito 
1175f2ec16ccSHideki Saito   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1176f2ec16ccSHideki Saito   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1177f2ec16ccSHideki Saito     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1178f2ec16ccSHideki Saito 
1179f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
11809e97caf5SRenato Golin     reportVectorizationFailure("Too many SCEV checks needed",
11819e97caf5SRenato Golin         "Too many SCEV assumptions need to be made and checked at runtime",
11829e97caf5SRenato Golin         "TooManySCEVRunTimeChecks");
1183f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1184f2ec16ccSHideki Saito       Result = false;
1185f2ec16ccSHideki Saito     else
1186f2ec16ccSHideki Saito       return false;
1187f2ec16ccSHideki Saito   }
1188f2ec16ccSHideki Saito 
1189f2ec16ccSHideki Saito   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1190f2ec16ccSHideki Saito   // we can vectorize, and at this point we don't have any other mem analysis
1191f2ec16ccSHideki Saito   // which may limit our maximum vectorization factor, so just return true with
1192f2ec16ccSHideki Saito   // no restrictions.
1193f2ec16ccSHideki Saito   return Result;
1194f2ec16ccSHideki Saito }
1195f2ec16ccSHideki Saito 
1196b0b5312eSAyal Zaks bool LoopVectorizationLegality::canFoldTailByMasking() {
1197b0b5312eSAyal Zaks 
1198b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1199b0b5312eSAyal Zaks 
1200b0b5312eSAyal Zaks   if (!PrimaryInduction) {
12019e97caf5SRenato Golin     reportVectorizationFailure(
12029e97caf5SRenato Golin         "No primary induction, cannot fold tail by masking",
12039e97caf5SRenato Golin         "Missing a primary induction variable in the loop, which is "
12049e97caf5SRenato Golin         "needed in order to fold tail by masking as required.",
12059e97caf5SRenato Golin         "NoPrimaryInduction");
1206b0b5312eSAyal Zaks     return false;
1207b0b5312eSAyal Zaks   }
1208b0b5312eSAyal Zaks 
1209b0b5312eSAyal Zaks   // TODO: handle reductions when tail is folded by masking.
1210b0b5312eSAyal Zaks   if (!Reductions.empty()) {
12119e97caf5SRenato Golin     reportVectorizationFailure(
12129e97caf5SRenato Golin         "Loop has reductions, cannot fold tail by masking",
12139e97caf5SRenato Golin         "Cannot fold tail by masking in the presence of reductions.",
12149e97caf5SRenato Golin         "ReductionFoldingTailByMasking");
1215b0b5312eSAyal Zaks     return false;
1216b0b5312eSAyal Zaks   }
1217b0b5312eSAyal Zaks 
1218b0b5312eSAyal Zaks   // TODO: handle outside users when tail is folded by masking.
1219b0b5312eSAyal Zaks   for (auto *AE : AllowedExit) {
1220b0b5312eSAyal Zaks     // Check that all users of allowed exit values are inside the loop.
1221b0b5312eSAyal Zaks     for (User *U : AE->users()) {
1222b0b5312eSAyal Zaks       Instruction *UI = cast<Instruction>(U);
1223b0b5312eSAyal Zaks       if (TheLoop->contains(UI))
1224b0b5312eSAyal Zaks         continue;
12259e97caf5SRenato Golin       reportVectorizationFailure(
12269e97caf5SRenato Golin           "Cannot fold tail by masking, loop has an outside user for",
12279e97caf5SRenato Golin           "Cannot fold tail by masking in the presence of live outs.",
12289e97caf5SRenato Golin           "LiveOutFoldingTailByMasking", UI);
1229b0b5312eSAyal Zaks       return false;
1230b0b5312eSAyal Zaks     }
1231b0b5312eSAyal Zaks   }
1232b0b5312eSAyal Zaks 
1233b0b5312eSAyal Zaks   // The list of pointers that we can safely read and write to remains empty.
1234b0b5312eSAyal Zaks   SmallPtrSet<Value *, 8> SafePointers;
1235b0b5312eSAyal Zaks 
1236b0b5312eSAyal Zaks   // Check and mark all blocks for predication, including those that ordinarily
1237b0b5312eSAyal Zaks   // do not need predication such as the header block.
1238b0b5312eSAyal Zaks   for (BasicBlock *BB : TheLoop->blocks()) {
1239b0b5312eSAyal Zaks     if (!blockCanBePredicated(BB, SafePointers)) {
12409e97caf5SRenato Golin       reportVectorizationFailure(
12419e97caf5SRenato Golin           "Cannot fold tail by masking as required",
12429e97caf5SRenato Golin           "control flow cannot be substituted for a select",
12439e97caf5SRenato Golin           "NoCFGForSelect", BB->getTerminator());
1244b0b5312eSAyal Zaks       return false;
1245b0b5312eSAyal Zaks     }
1246b0b5312eSAyal Zaks   }
1247b0b5312eSAyal Zaks 
1248b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1249b0b5312eSAyal Zaks   return true;
1250b0b5312eSAyal Zaks }
1251b0b5312eSAyal Zaks 
1252f2ec16ccSHideki Saito } // namespace llvm
1253