1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===//
2f2ec16ccSHideki Saito //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6f2ec16ccSHideki Saito //
7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===//
8f2ec16ccSHideki Saito //
9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code
10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time.
11f2ec16ccSHideki Saito //
12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis
13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there
14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first).
15f2ec16ccSHideki Saito //
16f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
17f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h"
18f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h"
19f2ec16ccSHideki Saito 
20f2ec16ccSHideki Saito using namespace llvm;
21f2ec16ccSHideki Saito 
22f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize"
23f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME
24f2ec16ccSHideki Saito 
254e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication;
264e4ecae0SHideki Saito 
27f2ec16ccSHideki Saito static cl::opt<bool>
28f2ec16ccSHideki Saito     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
29f2ec16ccSHideki Saito                        cl::desc("Enable if-conversion during vectorization."));
30f2ec16ccSHideki Saito 
31f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold(
32f2ec16ccSHideki Saito     "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden,
33f2ec16ccSHideki Saito     cl::desc("The maximum allowed number of runtime memory checks with a "
34f2ec16ccSHideki Saito              "vectorize(enable) pragma."));
35f2ec16ccSHideki Saito 
36f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
37f2ec16ccSHideki Saito     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
38f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed."));
39f2ec16ccSHideki Saito 
40f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
41f2ec16ccSHideki Saito     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
42f2ec16ccSHideki Saito     cl::desc("The maximum number of SCEV checks allowed with a "
43f2ec16ccSHideki Saito              "vectorize(enable) pragma"));
44f2ec16ccSHideki Saito 
45f2ec16ccSHideki Saito /// Maximum vectorization interleave count.
46f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16;
47f2ec16ccSHideki Saito 
48f2ec16ccSHideki Saito namespace llvm {
49f2ec16ccSHideki Saito 
50*19189993SFangrui Song #ifndef NDEBUG
519e97caf5SRenato Golin static void debugVectorizationFailure(const StringRef DebugMsg,
529e97caf5SRenato Golin     Instruction *I) {
539e97caf5SRenato Golin   dbgs() << "LV: Not vectorizing: " << DebugMsg;
549e97caf5SRenato Golin   if (I != nullptr)
559e97caf5SRenato Golin     dbgs() << " " << *I;
569e97caf5SRenato Golin   else
579e97caf5SRenato Golin     dbgs() << '.';
589e97caf5SRenato Golin   dbgs() << '\n';
599e97caf5SRenato Golin }
60*19189993SFangrui Song #endif
619e97caf5SRenato Golin 
62f2ec16ccSHideki Saito OptimizationRemarkAnalysis createLVMissedAnalysis(const char *PassName,
63f2ec16ccSHideki Saito                                                   StringRef RemarkName,
64f2ec16ccSHideki Saito                                                   Loop *TheLoop,
65f2ec16ccSHideki Saito                                                   Instruction *I) {
66f2ec16ccSHideki Saito   Value *CodeRegion = TheLoop->getHeader();
67f2ec16ccSHideki Saito   DebugLoc DL = TheLoop->getStartLoc();
68f2ec16ccSHideki Saito 
69f2ec16ccSHideki Saito   if (I) {
70f2ec16ccSHideki Saito     CodeRegion = I->getParent();
71f2ec16ccSHideki Saito     // If there is no debug location attached to the instruction, revert back to
72f2ec16ccSHideki Saito     // using the loop's.
73f2ec16ccSHideki Saito     if (I->getDebugLoc())
74f2ec16ccSHideki Saito       DL = I->getDebugLoc();
75f2ec16ccSHideki Saito   }
76f2ec16ccSHideki Saito 
77f2ec16ccSHideki Saito   OptimizationRemarkAnalysis R(PassName, RemarkName, DL, CodeRegion);
78f2ec16ccSHideki Saito   R << "loop not vectorized: ";
79f2ec16ccSHideki Saito   return R;
80f2ec16ccSHideki Saito }
81f2ec16ccSHideki Saito 
82f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) {
83f2ec16ccSHideki Saito   switch (Kind) {
84f2ec16ccSHideki Saito   case HK_WIDTH:
85f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
86f2ec16ccSHideki Saito   case HK_UNROLL:
87f2ec16ccSHideki Saito     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
88f2ec16ccSHideki Saito   case HK_FORCE:
89f2ec16ccSHideki Saito     return (Val <= 1);
90f2ec16ccSHideki Saito   case HK_ISVECTORIZED:
91f2ec16ccSHideki Saito     return (Val == 0 || Val == 1);
92f2ec16ccSHideki Saito   }
93f2ec16ccSHideki Saito   return false;
94f2ec16ccSHideki Saito }
95f2ec16ccSHideki Saito 
96d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
97d4eb13c8SMichael Kruse                                        bool InterleaveOnlyWhenForced,
98f2ec16ccSHideki Saito                                        OptimizationRemarkEmitter &ORE)
99f2ec16ccSHideki Saito     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
100d4eb13c8SMichael Kruse       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL),
101f2ec16ccSHideki Saito       Force("vectorize.enable", FK_Undefined, HK_FORCE),
102f2ec16ccSHideki Saito       IsVectorized("isvectorized", 0, HK_ISVECTORIZED), TheLoop(L), ORE(ORE) {
103f2ec16ccSHideki Saito   // Populate values with existing loop metadata.
104f2ec16ccSHideki Saito   getHintsFromMetadata();
105f2ec16ccSHideki Saito 
106f2ec16ccSHideki Saito   // force-vector-interleave overrides DisableInterleaving.
107f2ec16ccSHideki Saito   if (VectorizerParams::isInterleaveForced())
108f2ec16ccSHideki Saito     Interleave.Value = VectorizerParams::VectorizationInterleave;
109f2ec16ccSHideki Saito 
110f2ec16ccSHideki Saito   if (IsVectorized.Value != 1)
111f2ec16ccSHideki Saito     // If the vectorization width and interleaving count are both 1 then
112f2ec16ccSHideki Saito     // consider the loop to have been already vectorized because there's
113f2ec16ccSHideki Saito     // nothing more that we can do.
114f2ec16ccSHideki Saito     IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1;
115d4eb13c8SMichael Kruse   LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs()
116f2ec16ccSHideki Saito              << "LV: Interleaving disabled by the pass manager\n");
117f2ec16ccSHideki Saito }
118f2ec16ccSHideki Saito 
11977a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() {
12077a614a6SMichael Kruse   LLVMContext &Context = TheLoop->getHeader()->getContext();
12177a614a6SMichael Kruse 
12277a614a6SMichael Kruse   MDNode *IsVectorizedMD = MDNode::get(
12377a614a6SMichael Kruse       Context,
12477a614a6SMichael Kruse       {MDString::get(Context, "llvm.loop.isvectorized"),
12577a614a6SMichael Kruse        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
12677a614a6SMichael Kruse   MDNode *LoopID = TheLoop->getLoopID();
12777a614a6SMichael Kruse   MDNode *NewLoopID =
12877a614a6SMichael Kruse       makePostTransformationMetadata(Context, LoopID,
12977a614a6SMichael Kruse                                      {Twine(Prefix(), "vectorize.").str(),
13077a614a6SMichael Kruse                                       Twine(Prefix(), "interleave.").str()},
13177a614a6SMichael Kruse                                      {IsVectorizedMD});
13277a614a6SMichael Kruse   TheLoop->setLoopID(NewLoopID);
13377a614a6SMichael Kruse 
13477a614a6SMichael Kruse   // Update internal cache.
13577a614a6SMichael Kruse   IsVectorized.Value = 1;
13677a614a6SMichael Kruse }
13777a614a6SMichael Kruse 
138d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization(
139d4eb13c8SMichael Kruse     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
140f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled) {
141d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
142f2ec16ccSHideki Saito     emitRemarkWithHints();
143f2ec16ccSHideki Saito     return false;
144f2ec16ccSHideki Saito   }
145f2ec16ccSHideki Saito 
146d4eb13c8SMichael Kruse   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
147d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
148f2ec16ccSHideki Saito     emitRemarkWithHints();
149f2ec16ccSHideki Saito     return false;
150f2ec16ccSHideki Saito   }
151f2ec16ccSHideki Saito 
152f2ec16ccSHideki Saito   if (getIsVectorized() == 1) {
153d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
154f2ec16ccSHideki Saito     // FIXME: Add interleave.disable metadata. This will allow
155f2ec16ccSHideki Saito     // vectorize.disable to be used without disabling the pass and errors
156f2ec16ccSHideki Saito     // to differentiate between disabled vectorization and a width of 1.
157f2ec16ccSHideki Saito     ORE.emit([&]() {
158f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
159f2ec16ccSHideki Saito                                         "AllDisabled", L->getStartLoc(),
160f2ec16ccSHideki Saito                                         L->getHeader())
161f2ec16ccSHideki Saito              << "loop not vectorized: vectorization and interleaving are "
162f2ec16ccSHideki Saito                 "explicitly disabled, or the loop has already been "
163f2ec16ccSHideki Saito                 "vectorized";
164f2ec16ccSHideki Saito     });
165f2ec16ccSHideki Saito     return false;
166f2ec16ccSHideki Saito   }
167f2ec16ccSHideki Saito 
168f2ec16ccSHideki Saito   return true;
169f2ec16ccSHideki Saito }
170f2ec16ccSHideki Saito 
171f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const {
172f2ec16ccSHideki Saito   using namespace ore;
173f2ec16ccSHideki Saito 
174f2ec16ccSHideki Saito   ORE.emit([&]() {
175f2ec16ccSHideki Saito     if (Force.Value == LoopVectorizeHints::FK_Disabled)
176f2ec16ccSHideki Saito       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
177f2ec16ccSHideki Saito                                       TheLoop->getStartLoc(),
178f2ec16ccSHideki Saito                                       TheLoop->getHeader())
179f2ec16ccSHideki Saito              << "loop not vectorized: vectorization is explicitly disabled";
180f2ec16ccSHideki Saito     else {
181f2ec16ccSHideki Saito       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
182f2ec16ccSHideki Saito                                  TheLoop->getStartLoc(), TheLoop->getHeader());
183f2ec16ccSHideki Saito       R << "loop not vectorized";
184f2ec16ccSHideki Saito       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
185f2ec16ccSHideki Saito         R << " (Force=" << NV("Force", true);
186f2ec16ccSHideki Saito         if (Width.Value != 0)
187f2ec16ccSHideki Saito           R << ", Vector Width=" << NV("VectorWidth", Width.Value);
188f2ec16ccSHideki Saito         if (Interleave.Value != 0)
189f2ec16ccSHideki Saito           R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value);
190f2ec16ccSHideki Saito         R << ")";
191f2ec16ccSHideki Saito       }
192f2ec16ccSHideki Saito       return R;
193f2ec16ccSHideki Saito     }
194f2ec16ccSHideki Saito   });
195f2ec16ccSHideki Saito }
196f2ec16ccSHideki Saito 
197f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
198f2ec16ccSHideki Saito   if (getWidth() == 1)
199f2ec16ccSHideki Saito     return LV_NAME;
200f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Disabled)
201f2ec16ccSHideki Saito     return LV_NAME;
202f2ec16ccSHideki Saito   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0)
203f2ec16ccSHideki Saito     return LV_NAME;
204f2ec16ccSHideki Saito   return OptimizationRemarkAnalysis::AlwaysPrint;
205f2ec16ccSHideki Saito }
206f2ec16ccSHideki Saito 
207f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() {
208f2ec16ccSHideki Saito   MDNode *LoopID = TheLoop->getLoopID();
209f2ec16ccSHideki Saito   if (!LoopID)
210f2ec16ccSHideki Saito     return;
211f2ec16ccSHideki Saito 
212f2ec16ccSHideki Saito   // First operand should refer to the loop id itself.
213f2ec16ccSHideki Saito   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
214f2ec16ccSHideki Saito   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
215f2ec16ccSHideki Saito 
216f2ec16ccSHideki Saito   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
217f2ec16ccSHideki Saito     const MDString *S = nullptr;
218f2ec16ccSHideki Saito     SmallVector<Metadata *, 4> Args;
219f2ec16ccSHideki Saito 
220f2ec16ccSHideki Saito     // The expected hint is either a MDString or a MDNode with the first
221f2ec16ccSHideki Saito     // operand a MDString.
222f2ec16ccSHideki Saito     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
223f2ec16ccSHideki Saito       if (!MD || MD->getNumOperands() == 0)
224f2ec16ccSHideki Saito         continue;
225f2ec16ccSHideki Saito       S = dyn_cast<MDString>(MD->getOperand(0));
226f2ec16ccSHideki Saito       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
227f2ec16ccSHideki Saito         Args.push_back(MD->getOperand(i));
228f2ec16ccSHideki Saito     } else {
229f2ec16ccSHideki Saito       S = dyn_cast<MDString>(LoopID->getOperand(i));
230f2ec16ccSHideki Saito       assert(Args.size() == 0 && "too many arguments for MDString");
231f2ec16ccSHideki Saito     }
232f2ec16ccSHideki Saito 
233f2ec16ccSHideki Saito     if (!S)
234f2ec16ccSHideki Saito       continue;
235f2ec16ccSHideki Saito 
236f2ec16ccSHideki Saito     // Check if the hint starts with the loop metadata prefix.
237f2ec16ccSHideki Saito     StringRef Name = S->getString();
238f2ec16ccSHideki Saito     if (Args.size() == 1)
239f2ec16ccSHideki Saito       setHint(Name, Args[0]);
240f2ec16ccSHideki Saito   }
241f2ec16ccSHideki Saito }
242f2ec16ccSHideki Saito 
243f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
244f2ec16ccSHideki Saito   if (!Name.startswith(Prefix()))
245f2ec16ccSHideki Saito     return;
246f2ec16ccSHideki Saito   Name = Name.substr(Prefix().size(), StringRef::npos);
247f2ec16ccSHideki Saito 
248f2ec16ccSHideki Saito   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
249f2ec16ccSHideki Saito   if (!C)
250f2ec16ccSHideki Saito     return;
251f2ec16ccSHideki Saito   unsigned Val = C->getZExtValue();
252f2ec16ccSHideki Saito 
253f2ec16ccSHideki Saito   Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized};
254f2ec16ccSHideki Saito   for (auto H : Hints) {
255f2ec16ccSHideki Saito     if (Name == H->Name) {
256f2ec16ccSHideki Saito       if (H->validate(Val))
257f2ec16ccSHideki Saito         H->Value = Val;
258f2ec16ccSHideki Saito       else
259d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
260f2ec16ccSHideki Saito       break;
261f2ec16ccSHideki Saito     }
262f2ec16ccSHideki Saito   }
263f2ec16ccSHideki Saito }
264f2ec16ccSHideki Saito 
265f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet(
266f2ec16ccSHideki Saito     Function *F, Loop *L, const LoopVectorizeHints &Hints) {
267f2ec16ccSHideki Saito   const char *PassName = Hints.vectorizeAnalysisPassName();
268f2ec16ccSHideki Saito   bool Failed = false;
269f2ec16ccSHideki Saito   if (UnsafeAlgebraInst && !Hints.allowReordering()) {
270f2ec16ccSHideki Saito     ORE.emit([&]() {
271f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisFPCommute(
272f2ec16ccSHideki Saito                  PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(),
273f2ec16ccSHideki Saito                  UnsafeAlgebraInst->getParent())
274f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
275f2ec16ccSHideki Saito                 "floating-point operations";
276f2ec16ccSHideki Saito     });
277f2ec16ccSHideki Saito     Failed = true;
278f2ec16ccSHideki Saito   }
279f2ec16ccSHideki Saito 
280f2ec16ccSHideki Saito   // Test if runtime memcheck thresholds are exceeded.
281f2ec16ccSHideki Saito   bool PragmaThresholdReached =
282f2ec16ccSHideki Saito       NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold;
283f2ec16ccSHideki Saito   bool ThresholdReached =
284f2ec16ccSHideki Saito       NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold;
285f2ec16ccSHideki Saito   if ((ThresholdReached && !Hints.allowReordering()) ||
286f2ec16ccSHideki Saito       PragmaThresholdReached) {
287f2ec16ccSHideki Saito     ORE.emit([&]() {
288f2ec16ccSHideki Saito       return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps",
289f2ec16ccSHideki Saito                                                 L->getStartLoc(),
290f2ec16ccSHideki Saito                                                 L->getHeader())
291f2ec16ccSHideki Saito              << "loop not vectorized: cannot prove it is safe to reorder "
292f2ec16ccSHideki Saito                 "memory operations";
293f2ec16ccSHideki Saito     });
294d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n");
295f2ec16ccSHideki Saito     Failed = true;
296f2ec16ccSHideki Saito   }
297f2ec16ccSHideki Saito 
298f2ec16ccSHideki Saito   return Failed;
299f2ec16ccSHideki Saito }
300f2ec16ccSHideki Saito 
301f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop
302f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
303f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is
304f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is
305f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions:
306f2ec16ccSHideki Saito //   1) it has a canonical IV (starting from 0 and with stride 1),
307f2ec16ccSHideki Saito //   2) its latch terminator is a conditional branch and,
308f2ec16ccSHideki Saito //   3) its latch condition is a compare instruction whose operands are the
309f2ec16ccSHideki Saito //      canonical IV and an OuterLp invariant.
310f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not
311f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity.
312f2ec16ccSHideki Saito //
313f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation
314f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop
315f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and
316f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for
317f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we
318f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer
319f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit
320f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away
321f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not
322f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate
323f2ec16ccSHideki Saito // function that is only executed once for each \p Lp.
324f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
325f2ec16ccSHideki Saito   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
326f2ec16ccSHideki Saito 
327f2ec16ccSHideki Saito   // If Lp is the outer loop, it's uniform by definition.
328f2ec16ccSHideki Saito   if (Lp == OuterLp)
329f2ec16ccSHideki Saito     return true;
330f2ec16ccSHideki Saito   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
331f2ec16ccSHideki Saito 
332f2ec16ccSHideki Saito   // 1.
333f2ec16ccSHideki Saito   PHINode *IV = Lp->getCanonicalInductionVariable();
334f2ec16ccSHideki Saito   if (!IV) {
335d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
336f2ec16ccSHideki Saito     return false;
337f2ec16ccSHideki Saito   }
338f2ec16ccSHideki Saito 
339f2ec16ccSHideki Saito   // 2.
340f2ec16ccSHideki Saito   BasicBlock *Latch = Lp->getLoopLatch();
341f2ec16ccSHideki Saito   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
342f2ec16ccSHideki Saito   if (!LatchBr || LatchBr->isUnconditional()) {
343d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
344f2ec16ccSHideki Saito     return false;
345f2ec16ccSHideki Saito   }
346f2ec16ccSHideki Saito 
347f2ec16ccSHideki Saito   // 3.
348f2ec16ccSHideki Saito   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
349f2ec16ccSHideki Saito   if (!LatchCmp) {
350d34e60caSNicola Zaghen     LLVM_DEBUG(
351d34e60caSNicola Zaghen         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
352f2ec16ccSHideki Saito     return false;
353f2ec16ccSHideki Saito   }
354f2ec16ccSHideki Saito 
355f2ec16ccSHideki Saito   Value *CondOp0 = LatchCmp->getOperand(0);
356f2ec16ccSHideki Saito   Value *CondOp1 = LatchCmp->getOperand(1);
357f2ec16ccSHideki Saito   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
358f2ec16ccSHideki Saito   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
359f2ec16ccSHideki Saito       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
360d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
361f2ec16ccSHideki Saito     return false;
362f2ec16ccSHideki Saito   }
363f2ec16ccSHideki Saito 
364f2ec16ccSHideki Saito   return true;
365f2ec16ccSHideki Saito }
366f2ec16ccSHideki Saito 
367f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p
368f2ec16ccSHideki Saito // OuterLp.
369f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
370f2ec16ccSHideki Saito   if (!isUniformLoop(Lp, OuterLp))
371f2ec16ccSHideki Saito     return false;
372f2ec16ccSHideki Saito 
373f2ec16ccSHideki Saito   // Check if nested loops are uniform.
374f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
375f2ec16ccSHideki Saito     if (!isUniformLoopNest(SubLp, OuterLp))
376f2ec16ccSHideki Saito       return false;
377f2ec16ccSHideki Saito 
378f2ec16ccSHideki Saito   return true;
379f2ec16ccSHideki Saito }
380f2ec16ccSHideki Saito 
3815f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node.
382f2ec16ccSHideki Saito ///
383f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if
384f2ec16ccSHideki Saito /// convert.
385f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) {
386f2ec16ccSHideki Saito   for (PHINode &Phi : BB->phis()) {
387f2ec16ccSHideki Saito     for (Value *V : Phi.incoming_values())
388f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(V))
389f2ec16ccSHideki Saito         if (C->canTrap())
390f2ec16ccSHideki Saito           return false;
391f2ec16ccSHideki Saito   }
392f2ec16ccSHideki Saito   return true;
393f2ec16ccSHideki Saito }
394f2ec16ccSHideki Saito 
395f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
396f2ec16ccSHideki Saito   if (Ty->isPointerTy())
397f2ec16ccSHideki Saito     return DL.getIntPtrType(Ty);
398f2ec16ccSHideki Saito 
399f2ec16ccSHideki Saito   // It is possible that char's or short's overflow when we ask for the loop's
400f2ec16ccSHideki Saito   // trip count, work around this by changing the type size.
401f2ec16ccSHideki Saito   if (Ty->getScalarSizeInBits() < 32)
402f2ec16ccSHideki Saito     return Type::getInt32Ty(Ty->getContext());
403f2ec16ccSHideki Saito 
404f2ec16ccSHideki Saito   return Ty;
405f2ec16ccSHideki Saito }
406f2ec16ccSHideki Saito 
407f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
408f2ec16ccSHideki Saito   Ty0 = convertPointerToIntegerType(DL, Ty0);
409f2ec16ccSHideki Saito   Ty1 = convertPointerToIntegerType(DL, Ty1);
410f2ec16ccSHideki Saito   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
411f2ec16ccSHideki Saito     return Ty0;
412f2ec16ccSHideki Saito   return Ty1;
413f2ec16ccSHideki Saito }
414f2ec16ccSHideki Saito 
4155f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an
416f2ec16ccSHideki Saito /// identified reduction variable.
417f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
418f2ec16ccSHideki Saito                                SmallPtrSetImpl<Value *> &AllowedExit) {
41960a1e4ddSAnna Thomas   // Reductions, Inductions and non-header phis are allowed to have exit users. All
420f2ec16ccSHideki Saito   // other instructions must not have external users.
421f2ec16ccSHideki Saito   if (!AllowedExit.count(Inst))
422f2ec16ccSHideki Saito     // Check that all of the users of the loop are inside the BB.
423f2ec16ccSHideki Saito     for (User *U : Inst->users()) {
424f2ec16ccSHideki Saito       Instruction *UI = cast<Instruction>(U);
425f2ec16ccSHideki Saito       // This user may be a reduction exit value.
426f2ec16ccSHideki Saito       if (!TheLoop->contains(UI)) {
427d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
428f2ec16ccSHideki Saito         return true;
429f2ec16ccSHideki Saito       }
430f2ec16ccSHideki Saito     }
431f2ec16ccSHideki Saito   return false;
432f2ec16ccSHideki Saito }
433f2ec16ccSHideki Saito 
434f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
435f2ec16ccSHideki Saito   const ValueToValueMap &Strides =
436f2ec16ccSHideki Saito       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
437f2ec16ccSHideki Saito 
438f2ec16ccSHideki Saito   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, true, false);
439f2ec16ccSHideki Saito   if (Stride == 1 || Stride == -1)
440f2ec16ccSHideki Saito     return Stride;
441f2ec16ccSHideki Saito   return 0;
442f2ec16ccSHideki Saito }
443f2ec16ccSHideki Saito 
444f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) {
445f2ec16ccSHideki Saito   return LAI->isUniform(V);
446f2ec16ccSHideki Saito }
447f2ec16ccSHideki Saito 
4489e97caf5SRenato Golin void LoopVectorizationLegality::reportVectorizationFailure(
4499e97caf5SRenato Golin     const StringRef DebugMsg, const StringRef OREMsg,
4509e97caf5SRenato Golin     const StringRef ORETag, Instruction *I) const {
4519e97caf5SRenato Golin   LLVM_DEBUG(debugVectorizationFailure(DebugMsg, I));
4529e97caf5SRenato Golin   ORE->emit(createLVMissedAnalysis(Hints->vectorizeAnalysisPassName(),
4539e97caf5SRenato Golin       ORETag, TheLoop, I) << OREMsg);
4549e97caf5SRenato Golin }
4559e97caf5SRenato Golin 
456f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() {
457f2ec16ccSHideki Saito   assert(!TheLoop->empty() && "We are not vectorizing an outer loop.");
458f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
459f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
460f2ec16ccSHideki Saito   bool Result = true;
461f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
462f2ec16ccSHideki Saito 
463f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
464f2ec16ccSHideki Saito     // Check whether the BB terminator is a BranchInst. Any other terminator is
465f2ec16ccSHideki Saito     // not supported yet.
466f2ec16ccSHideki Saito     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
467f2ec16ccSHideki Saito     if (!Br) {
4689e97caf5SRenato Golin       reportVectorizationFailure("Unsupported basic block terminator",
4699e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
4709e97caf5SRenato Golin           "CFGNotUnderstood");
471f2ec16ccSHideki Saito       if (DoExtraAnalysis)
472f2ec16ccSHideki Saito         Result = false;
473f2ec16ccSHideki Saito       else
474f2ec16ccSHideki Saito         return false;
475f2ec16ccSHideki Saito     }
476f2ec16ccSHideki Saito 
477f2ec16ccSHideki Saito     // Check whether the BranchInst is a supported one. Only unconditional
478f2ec16ccSHideki Saito     // branches, conditional branches with an outer loop invariant condition or
479f2ec16ccSHideki Saito     // backedges are supported.
4804e4ecae0SHideki Saito     // FIXME: We skip these checks when VPlan predication is enabled as we
4814e4ecae0SHideki Saito     // want to allow divergent branches. This whole check will be removed
4824e4ecae0SHideki Saito     // once VPlan predication is on by default.
4834e4ecae0SHideki Saito     if (!EnableVPlanPredication && Br && Br->isConditional() &&
484f2ec16ccSHideki Saito         !TheLoop->isLoopInvariant(Br->getCondition()) &&
485f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(0)) &&
486f2ec16ccSHideki Saito         !LI->isLoopHeader(Br->getSuccessor(1))) {
4879e97caf5SRenato Golin       reportVectorizationFailure("Unsupported conditional branch",
4889e97caf5SRenato Golin           "loop control flow is not understood by vectorizer",
4899e97caf5SRenato Golin           "CFGNotUnderstood");
490f2ec16ccSHideki Saito       if (DoExtraAnalysis)
491f2ec16ccSHideki Saito         Result = false;
492f2ec16ccSHideki Saito       else
493f2ec16ccSHideki Saito         return false;
494f2ec16ccSHideki Saito     }
495f2ec16ccSHideki Saito   }
496f2ec16ccSHideki Saito 
497f2ec16ccSHideki Saito   // Check whether inner loops are uniform. At this point, we only support
498f2ec16ccSHideki Saito   // simple outer loops scenarios with uniform nested loops.
499f2ec16ccSHideki Saito   if (!isUniformLoopNest(TheLoop /*loop nest*/,
500f2ec16ccSHideki Saito                          TheLoop /*context outer loop*/)) {
5019e97caf5SRenato Golin     reportVectorizationFailure("Outer loop contains divergent loops",
5029e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
5039e97caf5SRenato Golin         "CFGNotUnderstood");
504f2ec16ccSHideki Saito     if (DoExtraAnalysis)
505f2ec16ccSHideki Saito       Result = false;
506f2ec16ccSHideki Saito     else
507f2ec16ccSHideki Saito       return false;
508f2ec16ccSHideki Saito   }
509f2ec16ccSHideki Saito 
510ea7f3035SHideki Saito   // Check whether we are able to set up outer loop induction.
511ea7f3035SHideki Saito   if (!setupOuterLoopInductions()) {
5129e97caf5SRenato Golin     reportVectorizationFailure("Unsupported outer loop Phi(s)",
5139e97caf5SRenato Golin                                "Unsupported outer loop Phi(s)",
5149e97caf5SRenato Golin                                "UnsupportedPhi");
515ea7f3035SHideki Saito     if (DoExtraAnalysis)
516ea7f3035SHideki Saito       Result = false;
517ea7f3035SHideki Saito     else
518ea7f3035SHideki Saito       return false;
519ea7f3035SHideki Saito   }
520ea7f3035SHideki Saito 
521f2ec16ccSHideki Saito   return Result;
522f2ec16ccSHideki Saito }
523f2ec16ccSHideki Saito 
524f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi(
525f2ec16ccSHideki Saito     PHINode *Phi, const InductionDescriptor &ID,
526f2ec16ccSHideki Saito     SmallPtrSetImpl<Value *> &AllowedExit) {
527f2ec16ccSHideki Saito   Inductions[Phi] = ID;
528f2ec16ccSHideki Saito 
529f2ec16ccSHideki Saito   // In case this induction also comes with casts that we know we can ignore
530f2ec16ccSHideki Saito   // in the vectorized loop body, record them here. All casts could be recorded
531f2ec16ccSHideki Saito   // here for ignoring, but suffices to record only the first (as it is the
532f2ec16ccSHideki Saito   // only one that may bw used outside the cast sequence).
533f2ec16ccSHideki Saito   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
534f2ec16ccSHideki Saito   if (!Casts.empty())
535f2ec16ccSHideki Saito     InductionCastsToIgnore.insert(*Casts.begin());
536f2ec16ccSHideki Saito 
537f2ec16ccSHideki Saito   Type *PhiTy = Phi->getType();
538f2ec16ccSHideki Saito   const DataLayout &DL = Phi->getModule()->getDataLayout();
539f2ec16ccSHideki Saito 
540f2ec16ccSHideki Saito   // Get the widest type.
541f2ec16ccSHideki Saito   if (!PhiTy->isFloatingPointTy()) {
542f2ec16ccSHideki Saito     if (!WidestIndTy)
543f2ec16ccSHideki Saito       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
544f2ec16ccSHideki Saito     else
545f2ec16ccSHideki Saito       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
546f2ec16ccSHideki Saito   }
547f2ec16ccSHideki Saito 
548f2ec16ccSHideki Saito   // Int inductions are special because we only allow one IV.
549f2ec16ccSHideki Saito   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
550f2ec16ccSHideki Saito       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
551f2ec16ccSHideki Saito       isa<Constant>(ID.getStartValue()) &&
552f2ec16ccSHideki Saito       cast<Constant>(ID.getStartValue())->isNullValue()) {
553f2ec16ccSHideki Saito 
554f2ec16ccSHideki Saito     // Use the phi node with the widest type as induction. Use the last
555f2ec16ccSHideki Saito     // one if there are multiple (no good reason for doing this other
556f2ec16ccSHideki Saito     // than it is expedient). We've checked that it begins at zero and
557f2ec16ccSHideki Saito     // steps by one, so this is a canonical induction variable.
558f2ec16ccSHideki Saito     if (!PrimaryInduction || PhiTy == WidestIndTy)
559f2ec16ccSHideki Saito       PrimaryInduction = Phi;
560f2ec16ccSHideki Saito   }
561f2ec16ccSHideki Saito 
562f2ec16ccSHideki Saito   // Both the PHI node itself, and the "post-increment" value feeding
563f2ec16ccSHideki Saito   // back into the PHI node may have external users.
564f2ec16ccSHideki Saito   // We can allow those uses, except if the SCEVs we have for them rely
565f2ec16ccSHideki Saito   // on predicates that only hold within the loop, since allowing the exit
5666a1dd77fSAnna Thomas   // currently means re-using this SCEV outside the loop (see PR33706 for more
5676a1dd77fSAnna Thomas   // details).
568f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().isAlwaysTrue()) {
569f2ec16ccSHideki Saito     AllowedExit.insert(Phi);
570f2ec16ccSHideki Saito     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
571f2ec16ccSHideki Saito   }
572f2ec16ccSHideki Saito 
573d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
574f2ec16ccSHideki Saito }
575f2ec16ccSHideki Saito 
576ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() {
577ea7f3035SHideki Saito   BasicBlock *Header = TheLoop->getHeader();
578ea7f3035SHideki Saito 
579ea7f3035SHideki Saito   // Returns true if a given Phi is a supported induction.
580ea7f3035SHideki Saito   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
581ea7f3035SHideki Saito     InductionDescriptor ID;
582ea7f3035SHideki Saito     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
583ea7f3035SHideki Saito         ID.getKind() == InductionDescriptor::IK_IntInduction) {
584ea7f3035SHideki Saito       addInductionPhi(&Phi, ID, AllowedExit);
585ea7f3035SHideki Saito       return true;
586ea7f3035SHideki Saito     } else {
587ea7f3035SHideki Saito       // Bail out for any Phi in the outer loop header that is not a supported
588ea7f3035SHideki Saito       // induction.
589ea7f3035SHideki Saito       LLVM_DEBUG(
590ea7f3035SHideki Saito           dbgs()
591ea7f3035SHideki Saito           << "LV: Found unsupported PHI for outer loop vectorization.\n");
592ea7f3035SHideki Saito       return false;
593ea7f3035SHideki Saito     }
594ea7f3035SHideki Saito   };
595ea7f3035SHideki Saito 
596ea7f3035SHideki Saito   if (llvm::all_of(Header->phis(), isSupportedPhi))
597ea7f3035SHideki Saito     return true;
598ea7f3035SHideki Saito   else
599ea7f3035SHideki Saito     return false;
600ea7f3035SHideki Saito }
601ea7f3035SHideki Saito 
602f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() {
603f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
604f2ec16ccSHideki Saito 
605f2ec16ccSHideki Saito   // Look for the attribute signaling the absence of NaNs.
606f2ec16ccSHideki Saito   Function &F = *Header->getParent();
607f2ec16ccSHideki Saito   HasFunNoNaNAttr =
608f2ec16ccSHideki Saito       F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";
609f2ec16ccSHideki Saito 
610f2ec16ccSHideki Saito   // For each block in the loop.
611f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
612f2ec16ccSHideki Saito     // Scan the instructions in the block and look for hazards.
613f2ec16ccSHideki Saito     for (Instruction &I : *BB) {
614f2ec16ccSHideki Saito       if (auto *Phi = dyn_cast<PHINode>(&I)) {
615f2ec16ccSHideki Saito         Type *PhiTy = Phi->getType();
616f2ec16ccSHideki Saito         // Check that this PHI type is allowed.
617f2ec16ccSHideki Saito         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
618f2ec16ccSHideki Saito             !PhiTy->isPointerTy()) {
6199e97caf5SRenato Golin           reportVectorizationFailure("Found a non-int non-pointer PHI",
6209e97caf5SRenato Golin                                      "loop control flow is not understood by vectorizer",
6219e97caf5SRenato Golin                                      "CFGNotUnderstood");
622f2ec16ccSHideki Saito           return false;
623f2ec16ccSHideki Saito         }
624f2ec16ccSHideki Saito 
625f2ec16ccSHideki Saito         // If this PHINode is not in the header block, then we know that we
626f2ec16ccSHideki Saito         // can convert it to select during if-conversion. No need to check if
627f2ec16ccSHideki Saito         // the PHIs in this block are induction or reduction variables.
628f2ec16ccSHideki Saito         if (BB != Header) {
62960a1e4ddSAnna Thomas           // Non-header phi nodes that have outside uses can be vectorized. Add
63060a1e4ddSAnna Thomas           // them to the list of allowed exits.
63160a1e4ddSAnna Thomas           // Unsafe cyclic dependencies with header phis are identified during
63260a1e4ddSAnna Thomas           // legalization for reduction, induction and first order
63360a1e4ddSAnna Thomas           // recurrences.
634f2ec16ccSHideki Saito           continue;
635f2ec16ccSHideki Saito         }
636f2ec16ccSHideki Saito 
637f2ec16ccSHideki Saito         // We only allow if-converted PHIs with exactly two incoming values.
638f2ec16ccSHideki Saito         if (Phi->getNumIncomingValues() != 2) {
6399e97caf5SRenato Golin           reportVectorizationFailure("Found an invalid PHI",
6409e97caf5SRenato Golin               "loop control flow is not understood by vectorizer",
6419e97caf5SRenato Golin               "CFGNotUnderstood", Phi);
642f2ec16ccSHideki Saito           return false;
643f2ec16ccSHideki Saito         }
644f2ec16ccSHideki Saito 
645f2ec16ccSHideki Saito         RecurrenceDescriptor RedDes;
646f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
647f2ec16ccSHideki Saito                                                  DT)) {
648f2ec16ccSHideki Saito           if (RedDes.hasUnsafeAlgebra())
649f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
650f2ec16ccSHideki Saito           AllowedExit.insert(RedDes.getLoopExitInstr());
651f2ec16ccSHideki Saito           Reductions[Phi] = RedDes;
652f2ec16ccSHideki Saito           continue;
653f2ec16ccSHideki Saito         }
654f2ec16ccSHideki Saito 
655b02b0ad8SAnna Thomas         // TODO: Instead of recording the AllowedExit, it would be good to record the
656b02b0ad8SAnna Thomas         // complementary set: NotAllowedExit. These include (but may not be
657b02b0ad8SAnna Thomas         // limited to):
658b02b0ad8SAnna Thomas         // 1. Reduction phis as they represent the one-before-last value, which
659b02b0ad8SAnna Thomas         // is not available when vectorized
660b02b0ad8SAnna Thomas         // 2. Induction phis and increment when SCEV predicates cannot be used
661b02b0ad8SAnna Thomas         // outside the loop - see addInductionPhi
662b02b0ad8SAnna Thomas         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
663b02b0ad8SAnna Thomas         // outside the loop - see call to hasOutsideLoopUser in the non-phi
664b02b0ad8SAnna Thomas         // handling below
665b02b0ad8SAnna Thomas         // 4. FirstOrderRecurrence phis that can possibly be handled by
666b02b0ad8SAnna Thomas         // extraction.
667b02b0ad8SAnna Thomas         // By recording these, we can then reason about ways to vectorize each
668b02b0ad8SAnna Thomas         // of these NotAllowedExit.
669f2ec16ccSHideki Saito         InductionDescriptor ID;
670f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
671f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
672f2ec16ccSHideki Saito           if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr)
673f2ec16ccSHideki Saito             Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst());
674f2ec16ccSHideki Saito           continue;
675f2ec16ccSHideki Saito         }
676f2ec16ccSHideki Saito 
677f2ec16ccSHideki Saito         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
678f2ec16ccSHideki Saito                                                          SinkAfter, DT)) {
679f2ec16ccSHideki Saito           FirstOrderRecurrences.insert(Phi);
680f2ec16ccSHideki Saito           continue;
681f2ec16ccSHideki Saito         }
682f2ec16ccSHideki Saito 
683f2ec16ccSHideki Saito         // As a last resort, coerce the PHI to a AddRec expression
684f2ec16ccSHideki Saito         // and re-try classifying it a an induction PHI.
685f2ec16ccSHideki Saito         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
686f2ec16ccSHideki Saito           addInductionPhi(Phi, ID, AllowedExit);
687f2ec16ccSHideki Saito           continue;
688f2ec16ccSHideki Saito         }
689f2ec16ccSHideki Saito 
6909e97caf5SRenato Golin         reportVectorizationFailure("Found an unidentified PHI",
6919e97caf5SRenato Golin             "value that could not be identified as "
6929e97caf5SRenato Golin             "reduction is used outside the loop",
6939e97caf5SRenato Golin             "NonReductionValueUsedOutsideLoop", Phi);
694f2ec16ccSHideki Saito         return false;
695f2ec16ccSHideki Saito       } // end of PHI handling
696f2ec16ccSHideki Saito 
697f2ec16ccSHideki Saito       // We handle calls that:
698f2ec16ccSHideki Saito       //   * Are debug info intrinsics.
699f2ec16ccSHideki Saito       //   * Have a mapping to an IR intrinsic.
700f2ec16ccSHideki Saito       //   * Have a vector version available.
701f2ec16ccSHideki Saito       auto *CI = dyn_cast<CallInst>(&I);
702f2ec16ccSHideki Saito       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
703f2ec16ccSHideki Saito           !isa<DbgInfoIntrinsic>(CI) &&
704f2ec16ccSHideki Saito           !(CI->getCalledFunction() && TLI &&
705f2ec16ccSHideki Saito             TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) {
7067d65fe5cSSanjay Patel         // If the call is a recognized math libary call, it is likely that
7077d65fe5cSSanjay Patel         // we can vectorize it given loosened floating-point constraints.
7087d65fe5cSSanjay Patel         LibFunc Func;
7097d65fe5cSSanjay Patel         bool IsMathLibCall =
7107d65fe5cSSanjay Patel             TLI && CI->getCalledFunction() &&
7117d65fe5cSSanjay Patel             CI->getType()->isFloatingPointTy() &&
7127d65fe5cSSanjay Patel             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
7137d65fe5cSSanjay Patel             TLI->hasOptimizedCodeGen(Func);
7147d65fe5cSSanjay Patel 
7157d65fe5cSSanjay Patel         if (IsMathLibCall) {
7167d65fe5cSSanjay Patel           // TODO: Ideally, we should not use clang-specific language here,
7177d65fe5cSSanjay Patel           // but it's hard to provide meaningful yet generic advice.
7187d65fe5cSSanjay Patel           // Also, should this be guarded by allowExtraAnalysis() and/or be part
7197d65fe5cSSanjay Patel           // of the returned info from isFunctionVectorizable()?
7209e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
7219e97caf5SRenato Golin               "library call cannot be vectorized. "
7227d65fe5cSSanjay Patel               "Try compiling with -fno-math-errno, -ffast-math, "
7239e97caf5SRenato Golin               "or similar flags",
7249e97caf5SRenato Golin               "CantVectorizeLibcall", CI);
7257d65fe5cSSanjay Patel         } else {
7269e97caf5SRenato Golin           reportVectorizationFailure("Found a non-intrinsic callsite",
7279e97caf5SRenato Golin                                      "call instruction cannot be vectorized",
7289e97caf5SRenato Golin                                      "CantVectorizeLibcall", CI);
7297d65fe5cSSanjay Patel         }
730f2ec16ccSHideki Saito         return false;
731f2ec16ccSHideki Saito       }
732f2ec16ccSHideki Saito 
733a066f1f9SSimon Pilgrim       // Some intrinsics have scalar arguments and should be same in order for
734a066f1f9SSimon Pilgrim       // them to be vectorized (i.e. loop invariant).
735a066f1f9SSimon Pilgrim       if (CI) {
736f2ec16ccSHideki Saito         auto *SE = PSE.getSE();
737a066f1f9SSimon Pilgrim         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
738a066f1f9SSimon Pilgrim         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
739a066f1f9SSimon Pilgrim           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
740a066f1f9SSimon Pilgrim             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
7419e97caf5SRenato Golin               reportVectorizationFailure("Found unvectorizable intrinsic",
7429e97caf5SRenato Golin                   "intrinsic instruction cannot be vectorized",
7439e97caf5SRenato Golin                   "CantVectorizeIntrinsic", CI);
744f2ec16ccSHideki Saito               return false;
745f2ec16ccSHideki Saito             }
746f2ec16ccSHideki Saito           }
747a066f1f9SSimon Pilgrim       }
748f2ec16ccSHideki Saito 
749f2ec16ccSHideki Saito       // Check that the instruction return type is vectorizable.
750f2ec16ccSHideki Saito       // Also, we can't vectorize extractelement instructions.
751f2ec16ccSHideki Saito       if ((!VectorType::isValidElementType(I.getType()) &&
752f2ec16ccSHideki Saito            !I.getType()->isVoidTy()) ||
753f2ec16ccSHideki Saito           isa<ExtractElementInst>(I)) {
7549e97caf5SRenato Golin         reportVectorizationFailure("Found unvectorizable type",
7559e97caf5SRenato Golin             "instruction return type cannot be vectorized",
7569e97caf5SRenato Golin             "CantVectorizeInstructionReturnType", &I);
757f2ec16ccSHideki Saito         return false;
758f2ec16ccSHideki Saito       }
759f2ec16ccSHideki Saito 
760f2ec16ccSHideki Saito       // Check that the stored type is vectorizable.
761f2ec16ccSHideki Saito       if (auto *ST = dyn_cast<StoreInst>(&I)) {
762f2ec16ccSHideki Saito         Type *T = ST->getValueOperand()->getType();
763f2ec16ccSHideki Saito         if (!VectorType::isValidElementType(T)) {
7649e97caf5SRenato Golin           reportVectorizationFailure("Store instruction cannot be vectorized",
7659e97caf5SRenato Golin                                      "store instruction cannot be vectorized",
7669e97caf5SRenato Golin                                      "CantVectorizeStore", ST);
767f2ec16ccSHideki Saito           return false;
768f2ec16ccSHideki Saito         }
769f2ec16ccSHideki Saito 
770f2ec16ccSHideki Saito         // FP instructions can allow unsafe algebra, thus vectorizable by
771f2ec16ccSHideki Saito         // non-IEEE-754 compliant SIMD units.
772f2ec16ccSHideki Saito         // This applies to floating-point math operations and calls, not memory
773f2ec16ccSHideki Saito         // operations, shuffles, or casts, as they don't change precision or
774f2ec16ccSHideki Saito         // semantics.
775f2ec16ccSHideki Saito       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
776f2ec16ccSHideki Saito                  !I.isFast()) {
777d34e60caSNicola Zaghen         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
778f2ec16ccSHideki Saito         Hints->setPotentiallyUnsafe();
779f2ec16ccSHideki Saito       }
780f2ec16ccSHideki Saito 
781f2ec16ccSHideki Saito       // Reduction instructions are allowed to have exit users.
782f2ec16ccSHideki Saito       // All other instructions must not have external users.
783f2ec16ccSHideki Saito       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
784b02b0ad8SAnna Thomas         // We can safely vectorize loops where instructions within the loop are
785b02b0ad8SAnna Thomas         // used outside the loop only if the SCEV predicates within the loop is
786b02b0ad8SAnna Thomas         // same as outside the loop. Allowing the exit means reusing the SCEV
787b02b0ad8SAnna Thomas         // outside the loop.
788b02b0ad8SAnna Thomas         if (PSE.getUnionPredicate().isAlwaysTrue()) {
789b02b0ad8SAnna Thomas           AllowedExit.insert(&I);
790b02b0ad8SAnna Thomas           continue;
791b02b0ad8SAnna Thomas         }
7929e97caf5SRenato Golin         reportVectorizationFailure("Value cannot be used outside the loop",
7939e97caf5SRenato Golin                                    "value cannot be used outside the loop",
7949e97caf5SRenato Golin                                    "ValueUsedOutsideLoop", &I);
795f2ec16ccSHideki Saito         return false;
796f2ec16ccSHideki Saito       }
797f2ec16ccSHideki Saito     } // next instr.
798f2ec16ccSHideki Saito   }
799f2ec16ccSHideki Saito 
800f2ec16ccSHideki Saito   if (!PrimaryInduction) {
801f2ec16ccSHideki Saito     if (Inductions.empty()) {
8029e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8039e97caf5SRenato Golin           "loop induction variable could not be identified",
8049e97caf5SRenato Golin           "NoInductionVariable");
805f2ec16ccSHideki Saito       return false;
8064f27730eSWarren Ristow     } else if (!WidestIndTy) {
8079e97caf5SRenato Golin       reportVectorizationFailure("Did not find one integer induction var",
8089e97caf5SRenato Golin           "integer loop induction variable could not be identified",
8099e97caf5SRenato Golin           "NoIntegerInductionVariable");
8104f27730eSWarren Ristow       return false;
8119e97caf5SRenato Golin     } else {
8129e97caf5SRenato Golin       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
813f2ec16ccSHideki Saito     }
814f2ec16ccSHideki Saito   }
815f2ec16ccSHideki Saito 
816f2ec16ccSHideki Saito   // Now we know the widest induction type, check if our found induction
817f2ec16ccSHideki Saito   // is the same size. If it's not, unset it here and InnerLoopVectorizer
818f2ec16ccSHideki Saito   // will create another.
819f2ec16ccSHideki Saito   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
820f2ec16ccSHideki Saito     PrimaryInduction = nullptr;
821f2ec16ccSHideki Saito 
822f2ec16ccSHideki Saito   return true;
823f2ec16ccSHideki Saito }
824f2ec16ccSHideki Saito 
825f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() {
826f2ec16ccSHideki Saito   LAI = &(*GetLAA)(*TheLoop);
827f2ec16ccSHideki Saito   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
828f2ec16ccSHideki Saito   if (LAR) {
829f2ec16ccSHideki Saito     ORE->emit([&]() {
830f2ec16ccSHideki Saito       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
831f2ec16ccSHideki Saito                                         "loop not vectorized: ", *LAR);
832f2ec16ccSHideki Saito     });
833f2ec16ccSHideki Saito   }
834f2ec16ccSHideki Saito   if (!LAI->canVectorizeMemory())
835f2ec16ccSHideki Saito     return false;
836f2ec16ccSHideki Saito 
8375e9215f0SAnna Thomas   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
8389e97caf5SRenato Golin     reportVectorizationFailure("Stores to a uniform address",
8399e97caf5SRenato Golin         "write to a loop invariant address could not be vectorized",
8409e97caf5SRenato Golin         "CantVectorizeStoreToLoopInvariantAddress");
841f2ec16ccSHideki Saito     return false;
842f2ec16ccSHideki Saito   }
843f2ec16ccSHideki Saito   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
844f2ec16ccSHideki Saito   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
845f2ec16ccSHideki Saito 
846f2ec16ccSHideki Saito   return true;
847f2ec16ccSHideki Saito }
848f2ec16ccSHideki Saito 
849f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
850f2ec16ccSHideki Saito   Value *In0 = const_cast<Value *>(V);
851f2ec16ccSHideki Saito   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
852f2ec16ccSHideki Saito   if (!PN)
853f2ec16ccSHideki Saito     return false;
854f2ec16ccSHideki Saito 
855f2ec16ccSHideki Saito   return Inductions.count(PN);
856f2ec16ccSHideki Saito }
857f2ec16ccSHideki Saito 
858f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
859f2ec16ccSHideki Saito   auto *Inst = dyn_cast<Instruction>(V);
860f2ec16ccSHideki Saito   return (Inst && InductionCastsToIgnore.count(Inst));
861f2ec16ccSHideki Saito }
862f2ec16ccSHideki Saito 
863f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
864f2ec16ccSHideki Saito   return isInductionPhi(V) || isCastedInductionVariable(V);
865f2ec16ccSHideki Saito }
866f2ec16ccSHideki Saito 
867f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
868f2ec16ccSHideki Saito   return FirstOrderRecurrences.count(Phi);
869f2ec16ccSHideki Saito }
870f2ec16ccSHideki Saito 
871f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) {
872f2ec16ccSHideki Saito   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
873f2ec16ccSHideki Saito }
874f2ec16ccSHideki Saito 
875f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated(
876f2ec16ccSHideki Saito     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs) {
877f2ec16ccSHideki Saito   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
878f2ec16ccSHideki Saito 
879f2ec16ccSHideki Saito   for (Instruction &I : *BB) {
880f2ec16ccSHideki Saito     // Check that we don't have a constant expression that can trap as operand.
881f2ec16ccSHideki Saito     for (Value *Operand : I.operands()) {
882f2ec16ccSHideki Saito       if (auto *C = dyn_cast<Constant>(Operand))
883f2ec16ccSHideki Saito         if (C->canTrap())
884f2ec16ccSHideki Saito           return false;
885f2ec16ccSHideki Saito     }
886f2ec16ccSHideki Saito     // We might be able to hoist the load.
887f2ec16ccSHideki Saito     if (I.mayReadFromMemory()) {
888f2ec16ccSHideki Saito       auto *LI = dyn_cast<LoadInst>(&I);
889f2ec16ccSHideki Saito       if (!LI)
890f2ec16ccSHideki Saito         return false;
891f2ec16ccSHideki Saito       if (!SafePtrs.count(LI->getPointerOperand())) {
892f2ec16ccSHideki Saito         // !llvm.mem.parallel_loop_access implies if-conversion safety.
893f2ec16ccSHideki Saito         // Otherwise, record that the load needs (real or emulated) masking
894f2ec16ccSHideki Saito         // and let the cost model decide.
895f2ec16ccSHideki Saito         if (!IsAnnotatedParallel)
896f2ec16ccSHideki Saito           MaskedOp.insert(LI);
897f2ec16ccSHideki Saito         continue;
898f2ec16ccSHideki Saito       }
899f2ec16ccSHideki Saito     }
900f2ec16ccSHideki Saito 
901f2ec16ccSHideki Saito     if (I.mayWriteToMemory()) {
902f2ec16ccSHideki Saito       auto *SI = dyn_cast<StoreInst>(&I);
903f2ec16ccSHideki Saito       if (!SI)
904f2ec16ccSHideki Saito         return false;
905f2ec16ccSHideki Saito       // Predicated store requires some form of masking:
906f2ec16ccSHideki Saito       // 1) masked store HW instruction,
907f2ec16ccSHideki Saito       // 2) emulation via load-blend-store (only if safe and legal to do so,
908f2ec16ccSHideki Saito       //    be aware on the race conditions), or
909f2ec16ccSHideki Saito       // 3) element-by-element predicate check and scalar store.
910f2ec16ccSHideki Saito       MaskedOp.insert(SI);
911f2ec16ccSHideki Saito       continue;
912f2ec16ccSHideki Saito     }
913f2ec16ccSHideki Saito     if (I.mayThrow())
914f2ec16ccSHideki Saito       return false;
915f2ec16ccSHideki Saito   }
916f2ec16ccSHideki Saito 
917f2ec16ccSHideki Saito   return true;
918f2ec16ccSHideki Saito }
919f2ec16ccSHideki Saito 
920f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
921f2ec16ccSHideki Saito   if (!EnableIfConversion) {
9229e97caf5SRenato Golin     reportVectorizationFailure("If-conversion is disabled",
9239e97caf5SRenato Golin                                "if-conversion is disabled",
9249e97caf5SRenato Golin                                "IfConversionDisabled");
925f2ec16ccSHideki Saito     return false;
926f2ec16ccSHideki Saito   }
927f2ec16ccSHideki Saito 
928f2ec16ccSHideki Saito   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
929f2ec16ccSHideki Saito 
930f2ec16ccSHideki Saito   // A list of pointers that we can safely read and write to.
931f2ec16ccSHideki Saito   SmallPtrSet<Value *, 8> SafePointes;
932f2ec16ccSHideki Saito 
933f2ec16ccSHideki Saito   // Collect safe addresses.
934f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
935f2ec16ccSHideki Saito     if (blockNeedsPredication(BB))
936f2ec16ccSHideki Saito       continue;
937f2ec16ccSHideki Saito 
938f2ec16ccSHideki Saito     for (Instruction &I : *BB)
939f2ec16ccSHideki Saito       if (auto *Ptr = getLoadStorePointerOperand(&I))
940f2ec16ccSHideki Saito         SafePointes.insert(Ptr);
941f2ec16ccSHideki Saito   }
942f2ec16ccSHideki Saito 
943f2ec16ccSHideki Saito   // Collect the blocks that need predication.
944f2ec16ccSHideki Saito   BasicBlock *Header = TheLoop->getHeader();
945f2ec16ccSHideki Saito   for (BasicBlock *BB : TheLoop->blocks()) {
946f2ec16ccSHideki Saito     // We don't support switch statements inside loops.
947f2ec16ccSHideki Saito     if (!isa<BranchInst>(BB->getTerminator())) {
9489e97caf5SRenato Golin       reportVectorizationFailure("Loop contains a switch statement",
9499e97caf5SRenato Golin                                  "loop contains a switch statement",
9509e97caf5SRenato Golin                                  "LoopContainsSwitch", BB->getTerminator());
951f2ec16ccSHideki Saito       return false;
952f2ec16ccSHideki Saito     }
953f2ec16ccSHideki Saito 
954f2ec16ccSHideki Saito     // We must be able to predicate all blocks that need to be predicated.
955f2ec16ccSHideki Saito     if (blockNeedsPredication(BB)) {
956f2ec16ccSHideki Saito       if (!blockCanBePredicated(BB, SafePointes)) {
9579e97caf5SRenato Golin         reportVectorizationFailure(
9589e97caf5SRenato Golin             "Control flow cannot be substituted for a select",
9599e97caf5SRenato Golin             "control flow cannot be substituted for a select",
9609e97caf5SRenato Golin             "NoCFGForSelect", BB->getTerminator());
961f2ec16ccSHideki Saito         return false;
962f2ec16ccSHideki Saito       }
963f2ec16ccSHideki Saito     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
9649e97caf5SRenato Golin       reportVectorizationFailure(
9659e97caf5SRenato Golin           "Control flow cannot be substituted for a select",
9669e97caf5SRenato Golin           "control flow cannot be substituted for a select",
9679e97caf5SRenato Golin           "NoCFGForSelect", BB->getTerminator());
968f2ec16ccSHideki Saito       return false;
969f2ec16ccSHideki Saito     }
970f2ec16ccSHideki Saito   }
971f2ec16ccSHideki Saito 
972f2ec16ccSHideki Saito   // We can if-convert this loop.
973f2ec16ccSHideki Saito   return true;
974f2ec16ccSHideki Saito }
975f2ec16ccSHideki Saito 
976f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG.
977f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
978f2ec16ccSHideki Saito                                                     bool UseVPlanNativePath) {
979f2ec16ccSHideki Saito   assert((UseVPlanNativePath || Lp->empty()) &&
980f2ec16ccSHideki Saito          "VPlan-native path is not enabled.");
981f2ec16ccSHideki Saito 
982f2ec16ccSHideki Saito   // TODO: ORE should be improved to show more accurate information when an
983f2ec16ccSHideki Saito   // outer loop can't be vectorized because a nested loop is not understood or
984f2ec16ccSHideki Saito   // legal. Something like: "outer_loop_location: loop not vectorized:
985f2ec16ccSHideki Saito   // (inner_loop_location) loop control flow is not understood by vectorizer".
986f2ec16ccSHideki Saito 
987f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
988f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
989f2ec16ccSHideki Saito   bool Result = true;
990f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
991f2ec16ccSHideki Saito 
992f2ec16ccSHideki Saito   // We must have a loop in canonical form. Loops with indirectbr in them cannot
993f2ec16ccSHideki Saito   // be canonicalized.
994f2ec16ccSHideki Saito   if (!Lp->getLoopPreheader()) {
9959e97caf5SRenato Golin     reportVectorizationFailure("Loop doesn't have a legal pre-header",
9969e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
9979e97caf5SRenato Golin         "CFGNotUnderstood");
998f2ec16ccSHideki Saito     if (DoExtraAnalysis)
999f2ec16ccSHideki Saito       Result = false;
1000f2ec16ccSHideki Saito     else
1001f2ec16ccSHideki Saito       return false;
1002f2ec16ccSHideki Saito   }
1003f2ec16ccSHideki Saito 
1004f2ec16ccSHideki Saito   // We must have a single backedge.
1005f2ec16ccSHideki Saito   if (Lp->getNumBackEdges() != 1) {
10069e97caf5SRenato Golin     reportVectorizationFailure("The loop must have a single backedge",
10079e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
10089e97caf5SRenato Golin         "CFGNotUnderstood");
1009f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1010f2ec16ccSHideki Saito       Result = false;
1011f2ec16ccSHideki Saito     else
1012f2ec16ccSHideki Saito       return false;
1013f2ec16ccSHideki Saito   }
1014f2ec16ccSHideki Saito 
1015f2ec16ccSHideki Saito   // We must have a single exiting block.
1016f2ec16ccSHideki Saito   if (!Lp->getExitingBlock()) {
10179e97caf5SRenato Golin     reportVectorizationFailure("The loop must have an exiting block",
10189e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
10199e97caf5SRenato Golin         "CFGNotUnderstood");
1020f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1021f2ec16ccSHideki Saito       Result = false;
1022f2ec16ccSHideki Saito     else
1023f2ec16ccSHideki Saito       return false;
1024f2ec16ccSHideki Saito   }
1025f2ec16ccSHideki Saito 
1026f2ec16ccSHideki Saito   // We only handle bottom-tested loops, i.e. loop in which the condition is
1027f2ec16ccSHideki Saito   // checked at the end of each iteration. With that we can assume that all
1028f2ec16ccSHideki Saito   // instructions in the loop are executed the same number of times.
1029f2ec16ccSHideki Saito   if (Lp->getExitingBlock() != Lp->getLoopLatch()) {
10309e97caf5SRenato Golin     reportVectorizationFailure("The exiting block is not the loop latch",
10319e97caf5SRenato Golin         "loop control flow is not understood by vectorizer",
10329e97caf5SRenato Golin         "CFGNotUnderstood");
1033f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1034f2ec16ccSHideki Saito       Result = false;
1035f2ec16ccSHideki Saito     else
1036f2ec16ccSHideki Saito       return false;
1037f2ec16ccSHideki Saito   }
1038f2ec16ccSHideki Saito 
1039f2ec16ccSHideki Saito   return Result;
1040f2ec16ccSHideki Saito }
1041f2ec16ccSHideki Saito 
1042f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
1043f2ec16ccSHideki Saito     Loop *Lp, bool UseVPlanNativePath) {
1044f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1045f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1046f2ec16ccSHideki Saito   bool Result = true;
1047f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1048f2ec16ccSHideki Saito   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
1049f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1050f2ec16ccSHideki Saito       Result = false;
1051f2ec16ccSHideki Saito     else
1052f2ec16ccSHideki Saito       return false;
1053f2ec16ccSHideki Saito   }
1054f2ec16ccSHideki Saito 
1055f2ec16ccSHideki Saito   // Recursively check whether the loop control flow of nested loops is
1056f2ec16ccSHideki Saito   // understood.
1057f2ec16ccSHideki Saito   for (Loop *SubLp : *Lp)
1058f2ec16ccSHideki Saito     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
1059f2ec16ccSHideki Saito       if (DoExtraAnalysis)
1060f2ec16ccSHideki Saito         Result = false;
1061f2ec16ccSHideki Saito       else
1062f2ec16ccSHideki Saito         return false;
1063f2ec16ccSHideki Saito     }
1064f2ec16ccSHideki Saito 
1065f2ec16ccSHideki Saito   return Result;
1066f2ec16ccSHideki Saito }
1067f2ec16ccSHideki Saito 
1068f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
1069f2ec16ccSHideki Saito   // Store the result and return it at the end instead of exiting early, in case
1070f2ec16ccSHideki Saito   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
1071f2ec16ccSHideki Saito   bool Result = true;
1072f2ec16ccSHideki Saito 
1073f2ec16ccSHideki Saito   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
1074f2ec16ccSHideki Saito   // Check whether the loop-related control flow in the loop nest is expected by
1075f2ec16ccSHideki Saito   // vectorizer.
1076f2ec16ccSHideki Saito   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
1077f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1078f2ec16ccSHideki Saito       Result = false;
1079f2ec16ccSHideki Saito     else
1080f2ec16ccSHideki Saito       return false;
1081f2ec16ccSHideki Saito   }
1082f2ec16ccSHideki Saito 
1083f2ec16ccSHideki Saito   // We need to have a loop header.
1084d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
1085f2ec16ccSHideki Saito                     << '\n');
1086f2ec16ccSHideki Saito 
1087f2ec16ccSHideki Saito   // Specific checks for outer loops. We skip the remaining legal checks at this
1088f2ec16ccSHideki Saito   // point because they don't support outer loops.
1089f2ec16ccSHideki Saito   if (!TheLoop->empty()) {
1090f2ec16ccSHideki Saito     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
1091f2ec16ccSHideki Saito 
1092f2ec16ccSHideki Saito     if (!canVectorizeOuterLoop()) {
10939e97caf5SRenato Golin       reportVectorizationFailure("Unsupported outer loop",
10949e97caf5SRenato Golin                                  "unsupported outer loop",
10959e97caf5SRenato Golin                                  "UnsupportedOuterLoop");
1096f2ec16ccSHideki Saito       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
1097f2ec16ccSHideki Saito       // outer loops.
1098f2ec16ccSHideki Saito       return false;
1099f2ec16ccSHideki Saito     }
1100f2ec16ccSHideki Saito 
1101d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
1102f2ec16ccSHideki Saito     return Result;
1103f2ec16ccSHideki Saito   }
1104f2ec16ccSHideki Saito 
1105f2ec16ccSHideki Saito   assert(TheLoop->empty() && "Inner loop expected.");
1106f2ec16ccSHideki Saito   // Check if we can if-convert non-single-bb loops.
1107f2ec16ccSHideki Saito   unsigned NumBlocks = TheLoop->getNumBlocks();
1108f2ec16ccSHideki Saito   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
1109d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
1110f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1111f2ec16ccSHideki Saito       Result = false;
1112f2ec16ccSHideki Saito     else
1113f2ec16ccSHideki Saito       return false;
1114f2ec16ccSHideki Saito   }
1115f2ec16ccSHideki Saito 
1116f2ec16ccSHideki Saito   // Check if we can vectorize the instructions and CFG in this loop.
1117f2ec16ccSHideki Saito   if (!canVectorizeInstrs()) {
1118d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
1119f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1120f2ec16ccSHideki Saito       Result = false;
1121f2ec16ccSHideki Saito     else
1122f2ec16ccSHideki Saito       return false;
1123f2ec16ccSHideki Saito   }
1124f2ec16ccSHideki Saito 
1125f2ec16ccSHideki Saito   // Go over each instruction and look at memory deps.
1126f2ec16ccSHideki Saito   if (!canVectorizeMemory()) {
1127d34e60caSNicola Zaghen     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
1128f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1129f2ec16ccSHideki Saito       Result = false;
1130f2ec16ccSHideki Saito     else
1131f2ec16ccSHideki Saito       return false;
1132f2ec16ccSHideki Saito   }
1133f2ec16ccSHideki Saito 
1134d34e60caSNicola Zaghen   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
1135f2ec16ccSHideki Saito                     << (LAI->getRuntimePointerChecking()->Need
1136f2ec16ccSHideki Saito                             ? " (with a runtime bound check)"
1137f2ec16ccSHideki Saito                             : "")
1138f2ec16ccSHideki Saito                     << "!\n");
1139f2ec16ccSHideki Saito 
1140f2ec16ccSHideki Saito   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
1141f2ec16ccSHideki Saito   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
1142f2ec16ccSHideki Saito     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
1143f2ec16ccSHideki Saito 
1144f2ec16ccSHideki Saito   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
11459e97caf5SRenato Golin     reportVectorizationFailure("Too many SCEV checks needed",
11469e97caf5SRenato Golin         "Too many SCEV assumptions need to be made and checked at runtime",
11479e97caf5SRenato Golin         "TooManySCEVRunTimeChecks");
1148f2ec16ccSHideki Saito     if (DoExtraAnalysis)
1149f2ec16ccSHideki Saito       Result = false;
1150f2ec16ccSHideki Saito     else
1151f2ec16ccSHideki Saito       return false;
1152f2ec16ccSHideki Saito   }
1153f2ec16ccSHideki Saito 
1154f2ec16ccSHideki Saito   // Okay! We've done all the tests. If any have failed, return false. Otherwise
1155f2ec16ccSHideki Saito   // we can vectorize, and at this point we don't have any other mem analysis
1156f2ec16ccSHideki Saito   // which may limit our maximum vectorization factor, so just return true with
1157f2ec16ccSHideki Saito   // no restrictions.
1158f2ec16ccSHideki Saito   return Result;
1159f2ec16ccSHideki Saito }
1160f2ec16ccSHideki Saito 
1161b0b5312eSAyal Zaks bool LoopVectorizationLegality::canFoldTailByMasking() {
1162b0b5312eSAyal Zaks 
1163b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
1164b0b5312eSAyal Zaks 
1165b0b5312eSAyal Zaks   if (!PrimaryInduction) {
11669e97caf5SRenato Golin     reportVectorizationFailure(
11679e97caf5SRenato Golin         "No primary induction, cannot fold tail by masking",
11689e97caf5SRenato Golin         "Missing a primary induction variable in the loop, which is "
11699e97caf5SRenato Golin         "needed in order to fold tail by masking as required.",
11709e97caf5SRenato Golin         "NoPrimaryInduction");
1171b0b5312eSAyal Zaks     return false;
1172b0b5312eSAyal Zaks   }
1173b0b5312eSAyal Zaks 
1174b0b5312eSAyal Zaks   // TODO: handle reductions when tail is folded by masking.
1175b0b5312eSAyal Zaks   if (!Reductions.empty()) {
11769e97caf5SRenato Golin     reportVectorizationFailure(
11779e97caf5SRenato Golin         "Loop has reductions, cannot fold tail by masking",
11789e97caf5SRenato Golin         "Cannot fold tail by masking in the presence of reductions.",
11799e97caf5SRenato Golin         "ReductionFoldingTailByMasking");
1180b0b5312eSAyal Zaks     return false;
1181b0b5312eSAyal Zaks   }
1182b0b5312eSAyal Zaks 
1183b0b5312eSAyal Zaks   // TODO: handle outside users when tail is folded by masking.
1184b0b5312eSAyal Zaks   for (auto *AE : AllowedExit) {
1185b0b5312eSAyal Zaks     // Check that all users of allowed exit values are inside the loop.
1186b0b5312eSAyal Zaks     for (User *U : AE->users()) {
1187b0b5312eSAyal Zaks       Instruction *UI = cast<Instruction>(U);
1188b0b5312eSAyal Zaks       if (TheLoop->contains(UI))
1189b0b5312eSAyal Zaks         continue;
11909e97caf5SRenato Golin       reportVectorizationFailure(
11919e97caf5SRenato Golin           "Cannot fold tail by masking, loop has an outside user for",
11929e97caf5SRenato Golin           "Cannot fold tail by masking in the presence of live outs.",
11939e97caf5SRenato Golin           "LiveOutFoldingTailByMasking", UI);
1194b0b5312eSAyal Zaks       return false;
1195b0b5312eSAyal Zaks     }
1196b0b5312eSAyal Zaks   }
1197b0b5312eSAyal Zaks 
1198b0b5312eSAyal Zaks   // The list of pointers that we can safely read and write to remains empty.
1199b0b5312eSAyal Zaks   SmallPtrSet<Value *, 8> SafePointers;
1200b0b5312eSAyal Zaks 
1201b0b5312eSAyal Zaks   // Check and mark all blocks for predication, including those that ordinarily
1202b0b5312eSAyal Zaks   // do not need predication such as the header block.
1203b0b5312eSAyal Zaks   for (BasicBlock *BB : TheLoop->blocks()) {
1204b0b5312eSAyal Zaks     if (!blockCanBePredicated(BB, SafePointers)) {
12059e97caf5SRenato Golin       reportVectorizationFailure(
12069e97caf5SRenato Golin           "Cannot fold tail by masking as required",
12079e97caf5SRenato Golin           "control flow cannot be substituted for a select",
12089e97caf5SRenato Golin           "NoCFGForSelect", BB->getTerminator());
1209b0b5312eSAyal Zaks       return false;
1210b0b5312eSAyal Zaks     }
1211b0b5312eSAyal Zaks   }
1212b0b5312eSAyal Zaks 
1213b0b5312eSAyal Zaks   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1214b0b5312eSAyal Zaks   return true;
1215b0b5312eSAyal Zaks }
1216b0b5312eSAyal Zaks 
1217f2ec16ccSHideki Saito } // namespace llvm
1218