1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===// 2f2ec16ccSHideki Saito // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6f2ec16ccSHideki Saito // 7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===// 8f2ec16ccSHideki Saito // 9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code 10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time. 11f2ec16ccSHideki Saito // 12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis 13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there 14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first). 15f2ec16ccSHideki Saito // 16f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" 177403569bSPhilip Reames #include "llvm/Analysis/Loads.h" 187403569bSPhilip Reames #include "llvm/Analysis/ValueTracking.h" 19f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h" 20f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h" 2123c11380SFlorian Hahn #include "llvm/IR/PatternMatch.h" 2223c11380SFlorian Hahn #include "llvm/Transforms/Vectorize/LoopVectorize.h" 23f2ec16ccSHideki Saito 24f2ec16ccSHideki Saito using namespace llvm; 2523c11380SFlorian Hahn using namespace PatternMatch; 26f2ec16ccSHideki Saito 27f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize" 28f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME 29f2ec16ccSHideki Saito 304e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication; 314e4ecae0SHideki Saito 32f2ec16ccSHideki Saito static cl::opt<bool> 33f2ec16ccSHideki Saito EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, 34f2ec16ccSHideki Saito cl::desc("Enable if-conversion during vectorization.")); 35f2ec16ccSHideki Saito 36f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold( 37f2ec16ccSHideki Saito "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, 38f2ec16ccSHideki Saito cl::desc("The maximum allowed number of runtime memory checks with a " 39f2ec16ccSHideki Saito "vectorize(enable) pragma.")); 40f2ec16ccSHideki Saito 41f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold( 42f2ec16ccSHideki Saito "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, 43f2ec16ccSHideki Saito cl::desc("The maximum number of SCEV checks allowed.")); 44f2ec16ccSHideki Saito 45f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold( 46f2ec16ccSHideki Saito "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, 47f2ec16ccSHideki Saito cl::desc("The maximum number of SCEV checks allowed with a " 48f2ec16ccSHideki Saito "vectorize(enable) pragma")); 49f2ec16ccSHideki Saito 50f2ec16ccSHideki Saito /// Maximum vectorization interleave count. 51f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16; 52f2ec16ccSHideki Saito 53f2ec16ccSHideki Saito namespace llvm { 54f2ec16ccSHideki Saito 55f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) { 56f2ec16ccSHideki Saito switch (Kind) { 57f2ec16ccSHideki Saito case HK_WIDTH: 58f2ec16ccSHideki Saito return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; 59f2ec16ccSHideki Saito case HK_UNROLL: 60f2ec16ccSHideki Saito return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; 61f2ec16ccSHideki Saito case HK_FORCE: 62f2ec16ccSHideki Saito return (Val <= 1); 63f2ec16ccSHideki Saito case HK_ISVECTORIZED: 6420b198ecSSjoerd Meijer case HK_PREDICATE: 65f2ec16ccSHideki Saito return (Val == 0 || Val == 1); 66f2ec16ccSHideki Saito } 67f2ec16ccSHideki Saito return false; 68f2ec16ccSHideki Saito } 69f2ec16ccSHideki Saito 70d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L, 71d4eb13c8SMichael Kruse bool InterleaveOnlyWhenForced, 72f2ec16ccSHideki Saito OptimizationRemarkEmitter &ORE) 73f2ec16ccSHideki Saito : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), 74d4eb13c8SMichael Kruse Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), 75f2ec16ccSHideki Saito Force("vectorize.enable", FK_Undefined, HK_FORCE), 7620b198ecSSjoerd Meijer IsVectorized("isvectorized", 0, HK_ISVECTORIZED), 77cb47b878SSjoerd Meijer Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L), 7820b198ecSSjoerd Meijer ORE(ORE) { 79f2ec16ccSHideki Saito // Populate values with existing loop metadata. 80f2ec16ccSHideki Saito getHintsFromMetadata(); 81f2ec16ccSHideki Saito 82f2ec16ccSHideki Saito // force-vector-interleave overrides DisableInterleaving. 83f2ec16ccSHideki Saito if (VectorizerParams::isInterleaveForced()) 84f2ec16ccSHideki Saito Interleave.Value = VectorizerParams::VectorizationInterleave; 85f2ec16ccSHideki Saito 86f2ec16ccSHideki Saito if (IsVectorized.Value != 1) 87f2ec16ccSHideki Saito // If the vectorization width and interleaving count are both 1 then 88f2ec16ccSHideki Saito // consider the loop to have been already vectorized because there's 89f2ec16ccSHideki Saito // nothing more that we can do. 90f2ec16ccSHideki Saito IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; 91d4eb13c8SMichael Kruse LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() 92f2ec16ccSHideki Saito << "LV: Interleaving disabled by the pass manager\n"); 93f2ec16ccSHideki Saito } 94f2ec16ccSHideki Saito 9577a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() { 9677a614a6SMichael Kruse LLVMContext &Context = TheLoop->getHeader()->getContext(); 9777a614a6SMichael Kruse 9877a614a6SMichael Kruse MDNode *IsVectorizedMD = MDNode::get( 9977a614a6SMichael Kruse Context, 10077a614a6SMichael Kruse {MDString::get(Context, "llvm.loop.isvectorized"), 10177a614a6SMichael Kruse ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); 10277a614a6SMichael Kruse MDNode *LoopID = TheLoop->getLoopID(); 10377a614a6SMichael Kruse MDNode *NewLoopID = 10477a614a6SMichael Kruse makePostTransformationMetadata(Context, LoopID, 10577a614a6SMichael Kruse {Twine(Prefix(), "vectorize.").str(), 10677a614a6SMichael Kruse Twine(Prefix(), "interleave.").str()}, 10777a614a6SMichael Kruse {IsVectorizedMD}); 10877a614a6SMichael Kruse TheLoop->setLoopID(NewLoopID); 10977a614a6SMichael Kruse 11077a614a6SMichael Kruse // Update internal cache. 11177a614a6SMichael Kruse IsVectorized.Value = 1; 11277a614a6SMichael Kruse } 11377a614a6SMichael Kruse 114d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization( 115d4eb13c8SMichael Kruse Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { 116f2ec16ccSHideki Saito if (getForce() == LoopVectorizeHints::FK_Disabled) { 117d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); 118f2ec16ccSHideki Saito emitRemarkWithHints(); 119f2ec16ccSHideki Saito return false; 120f2ec16ccSHideki Saito } 121f2ec16ccSHideki Saito 122d4eb13c8SMichael Kruse if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { 123d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); 124f2ec16ccSHideki Saito emitRemarkWithHints(); 125f2ec16ccSHideki Saito return false; 126f2ec16ccSHideki Saito } 127f2ec16ccSHideki Saito 128f2ec16ccSHideki Saito if (getIsVectorized() == 1) { 129d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); 130f2ec16ccSHideki Saito // FIXME: Add interleave.disable metadata. This will allow 131f2ec16ccSHideki Saito // vectorize.disable to be used without disabling the pass and errors 132f2ec16ccSHideki Saito // to differentiate between disabled vectorization and a width of 1. 133f2ec16ccSHideki Saito ORE.emit([&]() { 134f2ec16ccSHideki Saito return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), 135f2ec16ccSHideki Saito "AllDisabled", L->getStartLoc(), 136f2ec16ccSHideki Saito L->getHeader()) 137f2ec16ccSHideki Saito << "loop not vectorized: vectorization and interleaving are " 138f2ec16ccSHideki Saito "explicitly disabled, or the loop has already been " 139f2ec16ccSHideki Saito "vectorized"; 140f2ec16ccSHideki Saito }); 141f2ec16ccSHideki Saito return false; 142f2ec16ccSHideki Saito } 143f2ec16ccSHideki Saito 144f2ec16ccSHideki Saito return true; 145f2ec16ccSHideki Saito } 146f2ec16ccSHideki Saito 147f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const { 148f2ec16ccSHideki Saito using namespace ore; 149f2ec16ccSHideki Saito 150f2ec16ccSHideki Saito ORE.emit([&]() { 151f2ec16ccSHideki Saito if (Force.Value == LoopVectorizeHints::FK_Disabled) 152f2ec16ccSHideki Saito return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", 153f2ec16ccSHideki Saito TheLoop->getStartLoc(), 154f2ec16ccSHideki Saito TheLoop->getHeader()) 155f2ec16ccSHideki Saito << "loop not vectorized: vectorization is explicitly disabled"; 156f2ec16ccSHideki Saito else { 157f2ec16ccSHideki Saito OptimizationRemarkMissed R(LV_NAME, "MissedDetails", 158f2ec16ccSHideki Saito TheLoop->getStartLoc(), TheLoop->getHeader()); 159f2ec16ccSHideki Saito R << "loop not vectorized"; 160f2ec16ccSHideki Saito if (Force.Value == LoopVectorizeHints::FK_Enabled) { 161f2ec16ccSHideki Saito R << " (Force=" << NV("Force", true); 162f2ec16ccSHideki Saito if (Width.Value != 0) 163f2ec16ccSHideki Saito R << ", Vector Width=" << NV("VectorWidth", Width.Value); 164f2ec16ccSHideki Saito if (Interleave.Value != 0) 165f2ec16ccSHideki Saito R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); 166f2ec16ccSHideki Saito R << ")"; 167f2ec16ccSHideki Saito } 168f2ec16ccSHideki Saito return R; 169f2ec16ccSHideki Saito } 170f2ec16ccSHideki Saito }); 171f2ec16ccSHideki Saito } 172f2ec16ccSHideki Saito 173f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { 174f2ec16ccSHideki Saito if (getWidth() == 1) 175f2ec16ccSHideki Saito return LV_NAME; 176f2ec16ccSHideki Saito if (getForce() == LoopVectorizeHints::FK_Disabled) 177f2ec16ccSHideki Saito return LV_NAME; 178f2ec16ccSHideki Saito if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) 179f2ec16ccSHideki Saito return LV_NAME; 180f2ec16ccSHideki Saito return OptimizationRemarkAnalysis::AlwaysPrint; 181f2ec16ccSHideki Saito } 182f2ec16ccSHideki Saito 183f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() { 184f2ec16ccSHideki Saito MDNode *LoopID = TheLoop->getLoopID(); 185f2ec16ccSHideki Saito if (!LoopID) 186f2ec16ccSHideki Saito return; 187f2ec16ccSHideki Saito 188f2ec16ccSHideki Saito // First operand should refer to the loop id itself. 189f2ec16ccSHideki Saito assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 190f2ec16ccSHideki Saito assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 191f2ec16ccSHideki Saito 192f2ec16ccSHideki Saito for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 193f2ec16ccSHideki Saito const MDString *S = nullptr; 194f2ec16ccSHideki Saito SmallVector<Metadata *, 4> Args; 195f2ec16ccSHideki Saito 196f2ec16ccSHideki Saito // The expected hint is either a MDString or a MDNode with the first 197f2ec16ccSHideki Saito // operand a MDString. 198f2ec16ccSHideki Saito if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) { 199f2ec16ccSHideki Saito if (!MD || MD->getNumOperands() == 0) 200f2ec16ccSHideki Saito continue; 201f2ec16ccSHideki Saito S = dyn_cast<MDString>(MD->getOperand(0)); 202f2ec16ccSHideki Saito for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) 203f2ec16ccSHideki Saito Args.push_back(MD->getOperand(i)); 204f2ec16ccSHideki Saito } else { 205f2ec16ccSHideki Saito S = dyn_cast<MDString>(LoopID->getOperand(i)); 206f2ec16ccSHideki Saito assert(Args.size() == 0 && "too many arguments for MDString"); 207f2ec16ccSHideki Saito } 208f2ec16ccSHideki Saito 209f2ec16ccSHideki Saito if (!S) 210f2ec16ccSHideki Saito continue; 211f2ec16ccSHideki Saito 212f2ec16ccSHideki Saito // Check if the hint starts with the loop metadata prefix. 213f2ec16ccSHideki Saito StringRef Name = S->getString(); 214f2ec16ccSHideki Saito if (Args.size() == 1) 215f2ec16ccSHideki Saito setHint(Name, Args[0]); 216f2ec16ccSHideki Saito } 217f2ec16ccSHideki Saito } 218f2ec16ccSHideki Saito 219f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { 220f2ec16ccSHideki Saito if (!Name.startswith(Prefix())) 221f2ec16ccSHideki Saito return; 222f2ec16ccSHideki Saito Name = Name.substr(Prefix().size(), StringRef::npos); 223f2ec16ccSHideki Saito 224f2ec16ccSHideki Saito const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg); 225f2ec16ccSHideki Saito if (!C) 226f2ec16ccSHideki Saito return; 227f2ec16ccSHideki Saito unsigned Val = C->getZExtValue(); 228f2ec16ccSHideki Saito 22920b198ecSSjoerd Meijer Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; 230f2ec16ccSHideki Saito for (auto H : Hints) { 231f2ec16ccSHideki Saito if (Name == H->Name) { 232f2ec16ccSHideki Saito if (H->validate(Val)) 233f2ec16ccSHideki Saito H->Value = Val; 234f2ec16ccSHideki Saito else 235d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); 236f2ec16ccSHideki Saito break; 237f2ec16ccSHideki Saito } 238f2ec16ccSHideki Saito } 239f2ec16ccSHideki Saito } 240f2ec16ccSHideki Saito 241f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet( 242f2ec16ccSHideki Saito Function *F, Loop *L, const LoopVectorizeHints &Hints) { 243f2ec16ccSHideki Saito const char *PassName = Hints.vectorizeAnalysisPassName(); 244f2ec16ccSHideki Saito bool Failed = false; 245f2ec16ccSHideki Saito if (UnsafeAlgebraInst && !Hints.allowReordering()) { 246f2ec16ccSHideki Saito ORE.emit([&]() { 247f2ec16ccSHideki Saito return OptimizationRemarkAnalysisFPCommute( 248f2ec16ccSHideki Saito PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), 249f2ec16ccSHideki Saito UnsafeAlgebraInst->getParent()) 250f2ec16ccSHideki Saito << "loop not vectorized: cannot prove it is safe to reorder " 251f2ec16ccSHideki Saito "floating-point operations"; 252f2ec16ccSHideki Saito }); 253f2ec16ccSHideki Saito Failed = true; 254f2ec16ccSHideki Saito } 255f2ec16ccSHideki Saito 256f2ec16ccSHideki Saito // Test if runtime memcheck thresholds are exceeded. 257f2ec16ccSHideki Saito bool PragmaThresholdReached = 258f2ec16ccSHideki Saito NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; 259f2ec16ccSHideki Saito bool ThresholdReached = 260f2ec16ccSHideki Saito NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; 261f2ec16ccSHideki Saito if ((ThresholdReached && !Hints.allowReordering()) || 262f2ec16ccSHideki Saito PragmaThresholdReached) { 263f2ec16ccSHideki Saito ORE.emit([&]() { 264f2ec16ccSHideki Saito return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", 265f2ec16ccSHideki Saito L->getStartLoc(), 266f2ec16ccSHideki Saito L->getHeader()) 267f2ec16ccSHideki Saito << "loop not vectorized: cannot prove it is safe to reorder " 268f2ec16ccSHideki Saito "memory operations"; 269f2ec16ccSHideki Saito }); 270d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); 271f2ec16ccSHideki Saito Failed = true; 272f2ec16ccSHideki Saito } 273f2ec16ccSHideki Saito 274f2ec16ccSHideki Saito return Failed; 275f2ec16ccSHideki Saito } 276f2ec16ccSHideki Saito 277f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop 278f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes 279f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is 280f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is 281f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions: 282f2ec16ccSHideki Saito // 1) it has a canonical IV (starting from 0 and with stride 1), 283f2ec16ccSHideki Saito // 2) its latch terminator is a conditional branch and, 284f2ec16ccSHideki Saito // 3) its latch condition is a compare instruction whose operands are the 285f2ec16ccSHideki Saito // canonical IV and an OuterLp invariant. 286f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not 287f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity. 288f2ec16ccSHideki Saito // 289f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation 290f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop 291f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and 292f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for 293f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we 294f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer 295f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit 296f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away 297f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not 298f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate 299f2ec16ccSHideki Saito // function that is only executed once for each \p Lp. 300f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { 301f2ec16ccSHideki Saito assert(Lp->getLoopLatch() && "Expected loop with a single latch."); 302f2ec16ccSHideki Saito 303f2ec16ccSHideki Saito // If Lp is the outer loop, it's uniform by definition. 304f2ec16ccSHideki Saito if (Lp == OuterLp) 305f2ec16ccSHideki Saito return true; 306f2ec16ccSHideki Saito assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); 307f2ec16ccSHideki Saito 308f2ec16ccSHideki Saito // 1. 309f2ec16ccSHideki Saito PHINode *IV = Lp->getCanonicalInductionVariable(); 310f2ec16ccSHideki Saito if (!IV) { 311d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); 312f2ec16ccSHideki Saito return false; 313f2ec16ccSHideki Saito } 314f2ec16ccSHideki Saito 315f2ec16ccSHideki Saito // 2. 316f2ec16ccSHideki Saito BasicBlock *Latch = Lp->getLoopLatch(); 317f2ec16ccSHideki Saito auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 318f2ec16ccSHideki Saito if (!LatchBr || LatchBr->isUnconditional()) { 319d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); 320f2ec16ccSHideki Saito return false; 321f2ec16ccSHideki Saito } 322f2ec16ccSHideki Saito 323f2ec16ccSHideki Saito // 3. 324f2ec16ccSHideki Saito auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); 325f2ec16ccSHideki Saito if (!LatchCmp) { 326d34e60caSNicola Zaghen LLVM_DEBUG( 327d34e60caSNicola Zaghen dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); 328f2ec16ccSHideki Saito return false; 329f2ec16ccSHideki Saito } 330f2ec16ccSHideki Saito 331f2ec16ccSHideki Saito Value *CondOp0 = LatchCmp->getOperand(0); 332f2ec16ccSHideki Saito Value *CondOp1 = LatchCmp->getOperand(1); 333f2ec16ccSHideki Saito Value *IVUpdate = IV->getIncomingValueForBlock(Latch); 334f2ec16ccSHideki Saito if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && 335f2ec16ccSHideki Saito !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { 336d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); 337f2ec16ccSHideki Saito return false; 338f2ec16ccSHideki Saito } 339f2ec16ccSHideki Saito 340f2ec16ccSHideki Saito return true; 341f2ec16ccSHideki Saito } 342f2ec16ccSHideki Saito 343f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p 344f2ec16ccSHideki Saito // OuterLp. 345f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { 346f2ec16ccSHideki Saito if (!isUniformLoop(Lp, OuterLp)) 347f2ec16ccSHideki Saito return false; 348f2ec16ccSHideki Saito 349f2ec16ccSHideki Saito // Check if nested loops are uniform. 350f2ec16ccSHideki Saito for (Loop *SubLp : *Lp) 351f2ec16ccSHideki Saito if (!isUniformLoopNest(SubLp, OuterLp)) 352f2ec16ccSHideki Saito return false; 353f2ec16ccSHideki Saito 354f2ec16ccSHideki Saito return true; 355f2ec16ccSHideki Saito } 356f2ec16ccSHideki Saito 3575f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node. 358f2ec16ccSHideki Saito /// 359f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if 360f2ec16ccSHideki Saito /// convert. 361f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) { 362f2ec16ccSHideki Saito for (PHINode &Phi : BB->phis()) { 363f2ec16ccSHideki Saito for (Value *V : Phi.incoming_values()) 364f2ec16ccSHideki Saito if (auto *C = dyn_cast<Constant>(V)) 365f2ec16ccSHideki Saito if (C->canTrap()) 366f2ec16ccSHideki Saito return false; 367f2ec16ccSHideki Saito } 368f2ec16ccSHideki Saito return true; 369f2ec16ccSHideki Saito } 370f2ec16ccSHideki Saito 371f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { 372f2ec16ccSHideki Saito if (Ty->isPointerTy()) 373f2ec16ccSHideki Saito return DL.getIntPtrType(Ty); 374f2ec16ccSHideki Saito 375f2ec16ccSHideki Saito // It is possible that char's or short's overflow when we ask for the loop's 376f2ec16ccSHideki Saito // trip count, work around this by changing the type size. 377f2ec16ccSHideki Saito if (Ty->getScalarSizeInBits() < 32) 378f2ec16ccSHideki Saito return Type::getInt32Ty(Ty->getContext()); 379f2ec16ccSHideki Saito 380f2ec16ccSHideki Saito return Ty; 381f2ec16ccSHideki Saito } 382f2ec16ccSHideki Saito 383f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { 384f2ec16ccSHideki Saito Ty0 = convertPointerToIntegerType(DL, Ty0); 385f2ec16ccSHideki Saito Ty1 = convertPointerToIntegerType(DL, Ty1); 386f2ec16ccSHideki Saito if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) 387f2ec16ccSHideki Saito return Ty0; 388f2ec16ccSHideki Saito return Ty1; 389f2ec16ccSHideki Saito } 390f2ec16ccSHideki Saito 3915f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an 392f2ec16ccSHideki Saito /// identified reduction variable. 393f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, 394f2ec16ccSHideki Saito SmallPtrSetImpl<Value *> &AllowedExit) { 39560a1e4ddSAnna Thomas // Reductions, Inductions and non-header phis are allowed to have exit users. All 396f2ec16ccSHideki Saito // other instructions must not have external users. 397f2ec16ccSHideki Saito if (!AllowedExit.count(Inst)) 398f2ec16ccSHideki Saito // Check that all of the users of the loop are inside the BB. 399f2ec16ccSHideki Saito for (User *U : Inst->users()) { 400f2ec16ccSHideki Saito Instruction *UI = cast<Instruction>(U); 401f2ec16ccSHideki Saito // This user may be a reduction exit value. 402f2ec16ccSHideki Saito if (!TheLoop->contains(UI)) { 403d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); 404f2ec16ccSHideki Saito return true; 405f2ec16ccSHideki Saito } 406f2ec16ccSHideki Saito } 407f2ec16ccSHideki Saito return false; 408f2ec16ccSHideki Saito } 409f2ec16ccSHideki Saito 410f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { 411f2ec16ccSHideki Saito const ValueToValueMap &Strides = 412f2ec16ccSHideki Saito getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); 413f2ec16ccSHideki Saito 414d1170dbeSSjoerd Meijer bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize(); 415d1170dbeSSjoerd Meijer int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); 416f2ec16ccSHideki Saito if (Stride == 1 || Stride == -1) 417f2ec16ccSHideki Saito return Stride; 418f2ec16ccSHideki Saito return 0; 419f2ec16ccSHideki Saito } 420f2ec16ccSHideki Saito 421f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) { 422f2ec16ccSHideki Saito return LAI->isUniform(V); 423f2ec16ccSHideki Saito } 424f2ec16ccSHideki Saito 425f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() { 426f2ec16ccSHideki Saito assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); 427f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 428f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 429f2ec16ccSHideki Saito bool Result = true; 430f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 431f2ec16ccSHideki Saito 432f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 433f2ec16ccSHideki Saito // Check whether the BB terminator is a BranchInst. Any other terminator is 434f2ec16ccSHideki Saito // not supported yet. 435f2ec16ccSHideki Saito auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); 436f2ec16ccSHideki Saito if (!Br) { 4379e97caf5SRenato Golin reportVectorizationFailure("Unsupported basic block terminator", 4389e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 439ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 440f2ec16ccSHideki Saito if (DoExtraAnalysis) 441f2ec16ccSHideki Saito Result = false; 442f2ec16ccSHideki Saito else 443f2ec16ccSHideki Saito return false; 444f2ec16ccSHideki Saito } 445f2ec16ccSHideki Saito 446f2ec16ccSHideki Saito // Check whether the BranchInst is a supported one. Only unconditional 447f2ec16ccSHideki Saito // branches, conditional branches with an outer loop invariant condition or 448f2ec16ccSHideki Saito // backedges are supported. 4494e4ecae0SHideki Saito // FIXME: We skip these checks when VPlan predication is enabled as we 4504e4ecae0SHideki Saito // want to allow divergent branches. This whole check will be removed 4514e4ecae0SHideki Saito // once VPlan predication is on by default. 4524e4ecae0SHideki Saito if (!EnableVPlanPredication && Br && Br->isConditional() && 453f2ec16ccSHideki Saito !TheLoop->isLoopInvariant(Br->getCondition()) && 454f2ec16ccSHideki Saito !LI->isLoopHeader(Br->getSuccessor(0)) && 455f2ec16ccSHideki Saito !LI->isLoopHeader(Br->getSuccessor(1))) { 4569e97caf5SRenato Golin reportVectorizationFailure("Unsupported conditional branch", 4579e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 458ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 459f2ec16ccSHideki Saito if (DoExtraAnalysis) 460f2ec16ccSHideki Saito Result = false; 461f2ec16ccSHideki Saito else 462f2ec16ccSHideki Saito return false; 463f2ec16ccSHideki Saito } 464f2ec16ccSHideki Saito } 465f2ec16ccSHideki Saito 466f2ec16ccSHideki Saito // Check whether inner loops are uniform. At this point, we only support 467f2ec16ccSHideki Saito // simple outer loops scenarios with uniform nested loops. 468f2ec16ccSHideki Saito if (!isUniformLoopNest(TheLoop /*loop nest*/, 469f2ec16ccSHideki Saito TheLoop /*context outer loop*/)) { 4709e97caf5SRenato Golin reportVectorizationFailure("Outer loop contains divergent loops", 4719e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 472ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 473f2ec16ccSHideki Saito if (DoExtraAnalysis) 474f2ec16ccSHideki Saito Result = false; 475f2ec16ccSHideki Saito else 476f2ec16ccSHideki Saito return false; 477f2ec16ccSHideki Saito } 478f2ec16ccSHideki Saito 479ea7f3035SHideki Saito // Check whether we are able to set up outer loop induction. 480ea7f3035SHideki Saito if (!setupOuterLoopInductions()) { 4819e97caf5SRenato Golin reportVectorizationFailure("Unsupported outer loop Phi(s)", 4829e97caf5SRenato Golin "Unsupported outer loop Phi(s)", 483ec818d7fSHideki Saito "UnsupportedPhi", ORE, TheLoop); 484ea7f3035SHideki Saito if (DoExtraAnalysis) 485ea7f3035SHideki Saito Result = false; 486ea7f3035SHideki Saito else 487ea7f3035SHideki Saito return false; 488ea7f3035SHideki Saito } 489ea7f3035SHideki Saito 490f2ec16ccSHideki Saito return Result; 491f2ec16ccSHideki Saito } 492f2ec16ccSHideki Saito 493f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi( 494f2ec16ccSHideki Saito PHINode *Phi, const InductionDescriptor &ID, 495f2ec16ccSHideki Saito SmallPtrSetImpl<Value *> &AllowedExit) { 496f2ec16ccSHideki Saito Inductions[Phi] = ID; 497f2ec16ccSHideki Saito 498f2ec16ccSHideki Saito // In case this induction also comes with casts that we know we can ignore 499f2ec16ccSHideki Saito // in the vectorized loop body, record them here. All casts could be recorded 500f2ec16ccSHideki Saito // here for ignoring, but suffices to record only the first (as it is the 501f2ec16ccSHideki Saito // only one that may bw used outside the cast sequence). 502f2ec16ccSHideki Saito const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts(); 503f2ec16ccSHideki Saito if (!Casts.empty()) 504f2ec16ccSHideki Saito InductionCastsToIgnore.insert(*Casts.begin()); 505f2ec16ccSHideki Saito 506f2ec16ccSHideki Saito Type *PhiTy = Phi->getType(); 507f2ec16ccSHideki Saito const DataLayout &DL = Phi->getModule()->getDataLayout(); 508f2ec16ccSHideki Saito 509f2ec16ccSHideki Saito // Get the widest type. 510f2ec16ccSHideki Saito if (!PhiTy->isFloatingPointTy()) { 511f2ec16ccSHideki Saito if (!WidestIndTy) 512f2ec16ccSHideki Saito WidestIndTy = convertPointerToIntegerType(DL, PhiTy); 513f2ec16ccSHideki Saito else 514f2ec16ccSHideki Saito WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); 515f2ec16ccSHideki Saito } 516f2ec16ccSHideki Saito 517f2ec16ccSHideki Saito // Int inductions are special because we only allow one IV. 518f2ec16ccSHideki Saito if (ID.getKind() == InductionDescriptor::IK_IntInduction && 519f2ec16ccSHideki Saito ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && 520f2ec16ccSHideki Saito isa<Constant>(ID.getStartValue()) && 521f2ec16ccSHideki Saito cast<Constant>(ID.getStartValue())->isNullValue()) { 522f2ec16ccSHideki Saito 523f2ec16ccSHideki Saito // Use the phi node with the widest type as induction. Use the last 524f2ec16ccSHideki Saito // one if there are multiple (no good reason for doing this other 525f2ec16ccSHideki Saito // than it is expedient). We've checked that it begins at zero and 526f2ec16ccSHideki Saito // steps by one, so this is a canonical induction variable. 527f2ec16ccSHideki Saito if (!PrimaryInduction || PhiTy == WidestIndTy) 528f2ec16ccSHideki Saito PrimaryInduction = Phi; 529f2ec16ccSHideki Saito } 530f2ec16ccSHideki Saito 531f2ec16ccSHideki Saito // Both the PHI node itself, and the "post-increment" value feeding 532f2ec16ccSHideki Saito // back into the PHI node may have external users. 533f2ec16ccSHideki Saito // We can allow those uses, except if the SCEVs we have for them rely 534f2ec16ccSHideki Saito // on predicates that only hold within the loop, since allowing the exit 5356a1dd77fSAnna Thomas // currently means re-using this SCEV outside the loop (see PR33706 for more 5366a1dd77fSAnna Thomas // details). 537f2ec16ccSHideki Saito if (PSE.getUnionPredicate().isAlwaysTrue()) { 538f2ec16ccSHideki Saito AllowedExit.insert(Phi); 539f2ec16ccSHideki Saito AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); 540f2ec16ccSHideki Saito } 541f2ec16ccSHideki Saito 542d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); 543f2ec16ccSHideki Saito } 544f2ec16ccSHideki Saito 545ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() { 546ea7f3035SHideki Saito BasicBlock *Header = TheLoop->getHeader(); 547ea7f3035SHideki Saito 548ea7f3035SHideki Saito // Returns true if a given Phi is a supported induction. 549ea7f3035SHideki Saito auto isSupportedPhi = [&](PHINode &Phi) -> bool { 550ea7f3035SHideki Saito InductionDescriptor ID; 551ea7f3035SHideki Saito if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && 552ea7f3035SHideki Saito ID.getKind() == InductionDescriptor::IK_IntInduction) { 553ea7f3035SHideki Saito addInductionPhi(&Phi, ID, AllowedExit); 554ea7f3035SHideki Saito return true; 555ea7f3035SHideki Saito } else { 556ea7f3035SHideki Saito // Bail out for any Phi in the outer loop header that is not a supported 557ea7f3035SHideki Saito // induction. 558ea7f3035SHideki Saito LLVM_DEBUG( 559ea7f3035SHideki Saito dbgs() 560ea7f3035SHideki Saito << "LV: Found unsupported PHI for outer loop vectorization.\n"); 561ea7f3035SHideki Saito return false; 562ea7f3035SHideki Saito } 563ea7f3035SHideki Saito }; 564ea7f3035SHideki Saito 565ea7f3035SHideki Saito if (llvm::all_of(Header->phis(), isSupportedPhi)) 566ea7f3035SHideki Saito return true; 567ea7f3035SHideki Saito else 568ea7f3035SHideki Saito return false; 569ea7f3035SHideki Saito } 570ea7f3035SHideki Saito 57166c120f0SFrancesco Petrogalli /// Checks if a function is scalarizable according to the TLI, in 57266c120f0SFrancesco Petrogalli /// the sense that it should be vectorized and then expanded in 57366c120f0SFrancesco Petrogalli /// multiple scalarcalls. This is represented in the 57466c120f0SFrancesco Petrogalli /// TLI via mappings that do not specify a vector name, as in the 57566c120f0SFrancesco Petrogalli /// following example: 57666c120f0SFrancesco Petrogalli /// 57766c120f0SFrancesco Petrogalli /// const VecDesc VecIntrinsics[] = { 57866c120f0SFrancesco Petrogalli /// {"llvm.phx.abs.i32", "", 4} 57966c120f0SFrancesco Petrogalli /// }; 58066c120f0SFrancesco Petrogalli static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { 58166c120f0SFrancesco Petrogalli const StringRef ScalarName = CI.getCalledFunction()->getName(); 58266c120f0SFrancesco Petrogalli bool Scalarize = TLI.isFunctionVectorizable(ScalarName); 58366c120f0SFrancesco Petrogalli // Check that all known VFs are not associated to a vector 58466c120f0SFrancesco Petrogalli // function, i.e. the vector name is emty. 58566c120f0SFrancesco Petrogalli if (Scalarize) 58666c120f0SFrancesco Petrogalli for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); 58766c120f0SFrancesco Petrogalli VF <= WidestVF; VF *= 2) { 58866c120f0SFrancesco Petrogalli Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF); 58966c120f0SFrancesco Petrogalli } 59066c120f0SFrancesco Petrogalli return Scalarize; 59166c120f0SFrancesco Petrogalli } 59266c120f0SFrancesco Petrogalli 593f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() { 594f2ec16ccSHideki Saito BasicBlock *Header = TheLoop->getHeader(); 595f2ec16ccSHideki Saito 596f2ec16ccSHideki Saito // Look for the attribute signaling the absence of NaNs. 597f2ec16ccSHideki Saito Function &F = *Header->getParent(); 598f2ec16ccSHideki Saito HasFunNoNaNAttr = 599f2ec16ccSHideki Saito F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 600f2ec16ccSHideki Saito 601f2ec16ccSHideki Saito // For each block in the loop. 602f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 603f2ec16ccSHideki Saito // Scan the instructions in the block and look for hazards. 604f2ec16ccSHideki Saito for (Instruction &I : *BB) { 605f2ec16ccSHideki Saito if (auto *Phi = dyn_cast<PHINode>(&I)) { 606f2ec16ccSHideki Saito Type *PhiTy = Phi->getType(); 607f2ec16ccSHideki Saito // Check that this PHI type is allowed. 608f2ec16ccSHideki Saito if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 609f2ec16ccSHideki Saito !PhiTy->isPointerTy()) { 6109e97caf5SRenato Golin reportVectorizationFailure("Found a non-int non-pointer PHI", 6119e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 612ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 613f2ec16ccSHideki Saito return false; 614f2ec16ccSHideki Saito } 615f2ec16ccSHideki Saito 616f2ec16ccSHideki Saito // If this PHINode is not in the header block, then we know that we 617f2ec16ccSHideki Saito // can convert it to select during if-conversion. No need to check if 618f2ec16ccSHideki Saito // the PHIs in this block are induction or reduction variables. 619f2ec16ccSHideki Saito if (BB != Header) { 62060a1e4ddSAnna Thomas // Non-header phi nodes that have outside uses can be vectorized. Add 62160a1e4ddSAnna Thomas // them to the list of allowed exits. 62260a1e4ddSAnna Thomas // Unsafe cyclic dependencies with header phis are identified during 62360a1e4ddSAnna Thomas // legalization for reduction, induction and first order 62460a1e4ddSAnna Thomas // recurrences. 625dd18ce45SBjorn Pettersson AllowedExit.insert(&I); 626f2ec16ccSHideki Saito continue; 627f2ec16ccSHideki Saito } 628f2ec16ccSHideki Saito 629f2ec16ccSHideki Saito // We only allow if-converted PHIs with exactly two incoming values. 630f2ec16ccSHideki Saito if (Phi->getNumIncomingValues() != 2) { 6319e97caf5SRenato Golin reportVectorizationFailure("Found an invalid PHI", 6329e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 633ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop, Phi); 634f2ec16ccSHideki Saito return false; 635f2ec16ccSHideki Saito } 636f2ec16ccSHideki Saito 637f2ec16ccSHideki Saito RecurrenceDescriptor RedDes; 638f2ec16ccSHideki Saito if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, 639f2ec16ccSHideki Saito DT)) { 640f2ec16ccSHideki Saito if (RedDes.hasUnsafeAlgebra()) 641f2ec16ccSHideki Saito Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); 642f2ec16ccSHideki Saito AllowedExit.insert(RedDes.getLoopExitInstr()); 643f2ec16ccSHideki Saito Reductions[Phi] = RedDes; 644f2ec16ccSHideki Saito continue; 645f2ec16ccSHideki Saito } 646f2ec16ccSHideki Saito 647b02b0ad8SAnna Thomas // TODO: Instead of recording the AllowedExit, it would be good to record the 648b02b0ad8SAnna Thomas // complementary set: NotAllowedExit. These include (but may not be 649b02b0ad8SAnna Thomas // limited to): 650b02b0ad8SAnna Thomas // 1. Reduction phis as they represent the one-before-last value, which 651b02b0ad8SAnna Thomas // is not available when vectorized 652b02b0ad8SAnna Thomas // 2. Induction phis and increment when SCEV predicates cannot be used 653b02b0ad8SAnna Thomas // outside the loop - see addInductionPhi 654b02b0ad8SAnna Thomas // 3. Non-Phis with outside uses when SCEV predicates cannot be used 655b02b0ad8SAnna Thomas // outside the loop - see call to hasOutsideLoopUser in the non-phi 656b02b0ad8SAnna Thomas // handling below 657b02b0ad8SAnna Thomas // 4. FirstOrderRecurrence phis that can possibly be handled by 658b02b0ad8SAnna Thomas // extraction. 659b02b0ad8SAnna Thomas // By recording these, we can then reason about ways to vectorize each 660b02b0ad8SAnna Thomas // of these NotAllowedExit. 661f2ec16ccSHideki Saito InductionDescriptor ID; 662f2ec16ccSHideki Saito if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { 663f2ec16ccSHideki Saito addInductionPhi(Phi, ID, AllowedExit); 664f2ec16ccSHideki Saito if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) 665f2ec16ccSHideki Saito Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); 666f2ec16ccSHideki Saito continue; 667f2ec16ccSHideki Saito } 668f2ec16ccSHideki Saito 669f2ec16ccSHideki Saito if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, 670f2ec16ccSHideki Saito SinkAfter, DT)) { 671f2ec16ccSHideki Saito FirstOrderRecurrences.insert(Phi); 672f2ec16ccSHideki Saito continue; 673f2ec16ccSHideki Saito } 674f2ec16ccSHideki Saito 675f2ec16ccSHideki Saito // As a last resort, coerce the PHI to a AddRec expression 676f2ec16ccSHideki Saito // and re-try classifying it a an induction PHI. 677f2ec16ccSHideki Saito if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { 678f2ec16ccSHideki Saito addInductionPhi(Phi, ID, AllowedExit); 679f2ec16ccSHideki Saito continue; 680f2ec16ccSHideki Saito } 681f2ec16ccSHideki Saito 6829e97caf5SRenato Golin reportVectorizationFailure("Found an unidentified PHI", 6839e97caf5SRenato Golin "value that could not be identified as " 6849e97caf5SRenato Golin "reduction is used outside the loop", 685ec818d7fSHideki Saito "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); 686f2ec16ccSHideki Saito return false; 687f2ec16ccSHideki Saito } // end of PHI handling 688f2ec16ccSHideki Saito 689f2ec16ccSHideki Saito // We handle calls that: 690f2ec16ccSHideki Saito // * Are debug info intrinsics. 691f2ec16ccSHideki Saito // * Have a mapping to an IR intrinsic. 692f2ec16ccSHideki Saito // * Have a vector version available. 693f2ec16ccSHideki Saito auto *CI = dyn_cast<CallInst>(&I); 69466c120f0SFrancesco Petrogalli 695f2ec16ccSHideki Saito if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && 696f2ec16ccSHideki Saito !isa<DbgInfoIntrinsic>(CI) && 697f2ec16ccSHideki Saito !(CI->getCalledFunction() && TLI && 69866c120f0SFrancesco Petrogalli (!VFDatabase::getMappings(*CI).empty() || 69966c120f0SFrancesco Petrogalli isTLIScalarize(*TLI, *CI)))) { 7007d65fe5cSSanjay Patel // If the call is a recognized math libary call, it is likely that 7017d65fe5cSSanjay Patel // we can vectorize it given loosened floating-point constraints. 7027d65fe5cSSanjay Patel LibFunc Func; 7037d65fe5cSSanjay Patel bool IsMathLibCall = 7047d65fe5cSSanjay Patel TLI && CI->getCalledFunction() && 7057d65fe5cSSanjay Patel CI->getType()->isFloatingPointTy() && 7067d65fe5cSSanjay Patel TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && 7077d65fe5cSSanjay Patel TLI->hasOptimizedCodeGen(Func); 7087d65fe5cSSanjay Patel 7097d65fe5cSSanjay Patel if (IsMathLibCall) { 7107d65fe5cSSanjay Patel // TODO: Ideally, we should not use clang-specific language here, 7117d65fe5cSSanjay Patel // but it's hard to provide meaningful yet generic advice. 7127d65fe5cSSanjay Patel // Also, should this be guarded by allowExtraAnalysis() and/or be part 7137d65fe5cSSanjay Patel // of the returned info from isFunctionVectorizable()? 71466c120f0SFrancesco Petrogalli reportVectorizationFailure( 71566c120f0SFrancesco Petrogalli "Found a non-intrinsic callsite", 7169e97caf5SRenato Golin "library call cannot be vectorized. " 7177d65fe5cSSanjay Patel "Try compiling with -fno-math-errno, -ffast-math, " 7189e97caf5SRenato Golin "or similar flags", 719ec818d7fSHideki Saito "CantVectorizeLibcall", ORE, TheLoop, CI); 7207d65fe5cSSanjay Patel } else { 7219e97caf5SRenato Golin reportVectorizationFailure("Found a non-intrinsic callsite", 7229e97caf5SRenato Golin "call instruction cannot be vectorized", 723ec818d7fSHideki Saito "CantVectorizeLibcall", ORE, TheLoop, CI); 7247d65fe5cSSanjay Patel } 725f2ec16ccSHideki Saito return false; 726f2ec16ccSHideki Saito } 727f2ec16ccSHideki Saito 728a066f1f9SSimon Pilgrim // Some intrinsics have scalar arguments and should be same in order for 729a066f1f9SSimon Pilgrim // them to be vectorized (i.e. loop invariant). 730a066f1f9SSimon Pilgrim if (CI) { 731f2ec16ccSHideki Saito auto *SE = PSE.getSE(); 732a066f1f9SSimon Pilgrim Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); 733a066f1f9SSimon Pilgrim for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) 734a066f1f9SSimon Pilgrim if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { 735a066f1f9SSimon Pilgrim if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { 7369e97caf5SRenato Golin reportVectorizationFailure("Found unvectorizable intrinsic", 7379e97caf5SRenato Golin "intrinsic instruction cannot be vectorized", 738ec818d7fSHideki Saito "CantVectorizeIntrinsic", ORE, TheLoop, CI); 739f2ec16ccSHideki Saito return false; 740f2ec16ccSHideki Saito } 741f2ec16ccSHideki Saito } 742a066f1f9SSimon Pilgrim } 743f2ec16ccSHideki Saito 744f2ec16ccSHideki Saito // Check that the instruction return type is vectorizable. 745f2ec16ccSHideki Saito // Also, we can't vectorize extractelement instructions. 746f2ec16ccSHideki Saito if ((!VectorType::isValidElementType(I.getType()) && 747f2ec16ccSHideki Saito !I.getType()->isVoidTy()) || 748f2ec16ccSHideki Saito isa<ExtractElementInst>(I)) { 7499e97caf5SRenato Golin reportVectorizationFailure("Found unvectorizable type", 7509e97caf5SRenato Golin "instruction return type cannot be vectorized", 751ec818d7fSHideki Saito "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); 752f2ec16ccSHideki Saito return false; 753f2ec16ccSHideki Saito } 754f2ec16ccSHideki Saito 755f2ec16ccSHideki Saito // Check that the stored type is vectorizable. 756f2ec16ccSHideki Saito if (auto *ST = dyn_cast<StoreInst>(&I)) { 757f2ec16ccSHideki Saito Type *T = ST->getValueOperand()->getType(); 758f2ec16ccSHideki Saito if (!VectorType::isValidElementType(T)) { 7599e97caf5SRenato Golin reportVectorizationFailure("Store instruction cannot be vectorized", 7609e97caf5SRenato Golin "store instruction cannot be vectorized", 761ec818d7fSHideki Saito "CantVectorizeStore", ORE, TheLoop, ST); 762f2ec16ccSHideki Saito return false; 763f2ec16ccSHideki Saito } 764f2ec16ccSHideki Saito 7656452bdd2SWarren Ristow // For nontemporal stores, check that a nontemporal vector version is 7666452bdd2SWarren Ristow // supported on the target. 7676452bdd2SWarren Ristow if (ST->getMetadata(LLVMContext::MD_nontemporal)) { 7686452bdd2SWarren Ristow // Arbitrarily try a vector of 2 elements. 7696452bdd2SWarren Ristow Type *VecTy = VectorType::get(T, /*NumElements=*/2); 7706452bdd2SWarren Ristow assert(VecTy && "did not find vectorized version of stored type"); 7715e1e83eeSGuillaume Chatelet const MaybeAlign Alignment = getLoadStoreAlignment(ST); 77233671cefSGuillaume Chatelet assert(Alignment && "Alignment should be set"); 7735e1e83eeSGuillaume Chatelet if (!TTI->isLegalNTStore(VecTy, *Alignment)) { 7746452bdd2SWarren Ristow reportVectorizationFailure( 7756452bdd2SWarren Ristow "nontemporal store instruction cannot be vectorized", 7766452bdd2SWarren Ristow "nontemporal store instruction cannot be vectorized", 777ec818d7fSHideki Saito "CantVectorizeNontemporalStore", ORE, TheLoop, ST); 7786452bdd2SWarren Ristow return false; 7796452bdd2SWarren Ristow } 7806452bdd2SWarren Ristow } 7816452bdd2SWarren Ristow 7826452bdd2SWarren Ristow } else if (auto *LD = dyn_cast<LoadInst>(&I)) { 7836452bdd2SWarren Ristow if (LD->getMetadata(LLVMContext::MD_nontemporal)) { 7846452bdd2SWarren Ristow // For nontemporal loads, check that a nontemporal vector version is 7856452bdd2SWarren Ristow // supported on the target (arbitrarily try a vector of 2 elements). 7866452bdd2SWarren Ristow Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); 7876452bdd2SWarren Ristow assert(VecTy && "did not find vectorized version of load type"); 7885e1e83eeSGuillaume Chatelet const MaybeAlign Alignment = getLoadStoreAlignment(LD); 78933671cefSGuillaume Chatelet assert(Alignment && "Alignment should be set"); 7905e1e83eeSGuillaume Chatelet if (!TTI->isLegalNTLoad(VecTy, *Alignment)) { 7916452bdd2SWarren Ristow reportVectorizationFailure( 7926452bdd2SWarren Ristow "nontemporal load instruction cannot be vectorized", 7936452bdd2SWarren Ristow "nontemporal load instruction cannot be vectorized", 794ec818d7fSHideki Saito "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); 7956452bdd2SWarren Ristow return false; 7966452bdd2SWarren Ristow } 7976452bdd2SWarren Ristow } 7986452bdd2SWarren Ristow 799f2ec16ccSHideki Saito // FP instructions can allow unsafe algebra, thus vectorizable by 800f2ec16ccSHideki Saito // non-IEEE-754 compliant SIMD units. 801f2ec16ccSHideki Saito // This applies to floating-point math operations and calls, not memory 802f2ec16ccSHideki Saito // operations, shuffles, or casts, as they don't change precision or 803f2ec16ccSHideki Saito // semantics. 804f2ec16ccSHideki Saito } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && 805f2ec16ccSHideki Saito !I.isFast()) { 806d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); 807f2ec16ccSHideki Saito Hints->setPotentiallyUnsafe(); 808f2ec16ccSHideki Saito } 809f2ec16ccSHideki Saito 810f2ec16ccSHideki Saito // Reduction instructions are allowed to have exit users. 811f2ec16ccSHideki Saito // All other instructions must not have external users. 812f2ec16ccSHideki Saito if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { 813b02b0ad8SAnna Thomas // We can safely vectorize loops where instructions within the loop are 814b02b0ad8SAnna Thomas // used outside the loop only if the SCEV predicates within the loop is 815b02b0ad8SAnna Thomas // same as outside the loop. Allowing the exit means reusing the SCEV 816b02b0ad8SAnna Thomas // outside the loop. 817b02b0ad8SAnna Thomas if (PSE.getUnionPredicate().isAlwaysTrue()) { 818b02b0ad8SAnna Thomas AllowedExit.insert(&I); 819b02b0ad8SAnna Thomas continue; 820b02b0ad8SAnna Thomas } 8219e97caf5SRenato Golin reportVectorizationFailure("Value cannot be used outside the loop", 8229e97caf5SRenato Golin "value cannot be used outside the loop", 823ec818d7fSHideki Saito "ValueUsedOutsideLoop", ORE, TheLoop, &I); 824f2ec16ccSHideki Saito return false; 825f2ec16ccSHideki Saito } 826f2ec16ccSHideki Saito } // next instr. 827f2ec16ccSHideki Saito } 828f2ec16ccSHideki Saito 829f2ec16ccSHideki Saito if (!PrimaryInduction) { 830f2ec16ccSHideki Saito if (Inductions.empty()) { 8319e97caf5SRenato Golin reportVectorizationFailure("Did not find one integer induction var", 8329e97caf5SRenato Golin "loop induction variable could not be identified", 833ec818d7fSHideki Saito "NoInductionVariable", ORE, TheLoop); 834f2ec16ccSHideki Saito return false; 8354f27730eSWarren Ristow } else if (!WidestIndTy) { 8369e97caf5SRenato Golin reportVectorizationFailure("Did not find one integer induction var", 8379e97caf5SRenato Golin "integer loop induction variable could not be identified", 838ec818d7fSHideki Saito "NoIntegerInductionVariable", ORE, TheLoop); 8394f27730eSWarren Ristow return false; 8409e97caf5SRenato Golin } else { 8419e97caf5SRenato Golin LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); 842f2ec16ccSHideki Saito } 843f2ec16ccSHideki Saito } 844f2ec16ccSHideki Saito 8459d24933fSFlorian Hahn // For first order recurrences, we use the previous value (incoming value from 8469d24933fSFlorian Hahn // the latch) to check if it dominates all users of the recurrence. Bail out 8479d24933fSFlorian Hahn // if we have to sink such an instruction for another recurrence, as the 8489d24933fSFlorian Hahn // dominance requirement may not hold after sinking. 8499d24933fSFlorian Hahn BasicBlock *LoopLatch = TheLoop->getLoopLatch(); 8509d24933fSFlorian Hahn if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) { 8519d24933fSFlorian Hahn Instruction *V = 8529d24933fSFlorian Hahn cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch)); 8539d24933fSFlorian Hahn return SinkAfter.find(V) != SinkAfter.end(); 8549d24933fSFlorian Hahn })) 8559d24933fSFlorian Hahn return false; 8569d24933fSFlorian Hahn 857f2ec16ccSHideki Saito // Now we know the widest induction type, check if our found induction 858f2ec16ccSHideki Saito // is the same size. If it's not, unset it here and InnerLoopVectorizer 859f2ec16ccSHideki Saito // will create another. 860f2ec16ccSHideki Saito if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) 861f2ec16ccSHideki Saito PrimaryInduction = nullptr; 862f2ec16ccSHideki Saito 863f2ec16ccSHideki Saito return true; 864f2ec16ccSHideki Saito } 865f2ec16ccSHideki Saito 866f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() { 867f2ec16ccSHideki Saito LAI = &(*GetLAA)(*TheLoop); 868f2ec16ccSHideki Saito const OptimizationRemarkAnalysis *LAR = LAI->getReport(); 869f2ec16ccSHideki Saito if (LAR) { 870f2ec16ccSHideki Saito ORE->emit([&]() { 871f2ec16ccSHideki Saito return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), 872f2ec16ccSHideki Saito "loop not vectorized: ", *LAR); 873f2ec16ccSHideki Saito }); 874f2ec16ccSHideki Saito } 875f2ec16ccSHideki Saito if (!LAI->canVectorizeMemory()) 876f2ec16ccSHideki Saito return false; 877f2ec16ccSHideki Saito 8785e9215f0SAnna Thomas if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { 8799e97caf5SRenato Golin reportVectorizationFailure("Stores to a uniform address", 8809e97caf5SRenato Golin "write to a loop invariant address could not be vectorized", 881ec818d7fSHideki Saito "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 882f2ec16ccSHideki Saito return false; 883f2ec16ccSHideki Saito } 884f2ec16ccSHideki Saito Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); 885f2ec16ccSHideki Saito PSE.addPredicate(LAI->getPSE().getUnionPredicate()); 886f2ec16ccSHideki Saito 887f2ec16ccSHideki Saito return true; 888f2ec16ccSHideki Saito } 889f2ec16ccSHideki Saito 890f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) { 891f2ec16ccSHideki Saito Value *In0 = const_cast<Value *>(V); 892f2ec16ccSHideki Saito PHINode *PN = dyn_cast_or_null<PHINode>(In0); 893f2ec16ccSHideki Saito if (!PN) 894f2ec16ccSHideki Saito return false; 895f2ec16ccSHideki Saito 896f2ec16ccSHideki Saito return Inductions.count(PN); 897f2ec16ccSHideki Saito } 898f2ec16ccSHideki Saito 899f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { 900f2ec16ccSHideki Saito auto *Inst = dyn_cast<Instruction>(V); 901f2ec16ccSHideki Saito return (Inst && InductionCastsToIgnore.count(Inst)); 902f2ec16ccSHideki Saito } 903f2ec16ccSHideki Saito 904f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) { 905f2ec16ccSHideki Saito return isInductionPhi(V) || isCastedInductionVariable(V); 906f2ec16ccSHideki Saito } 907f2ec16ccSHideki Saito 908f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { 909f2ec16ccSHideki Saito return FirstOrderRecurrences.count(Phi); 910f2ec16ccSHideki Saito } 911f2ec16ccSHideki Saito 912f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { 913f2ec16ccSHideki Saito return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 914f2ec16ccSHideki Saito } 915f2ec16ccSHideki Saito 916f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated( 917d57d73daSDorit Nuzman BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) { 918f2ec16ccSHideki Saito const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 919f2ec16ccSHideki Saito 920f2ec16ccSHideki Saito for (Instruction &I : *BB) { 921f2ec16ccSHideki Saito // Check that we don't have a constant expression that can trap as operand. 922f2ec16ccSHideki Saito for (Value *Operand : I.operands()) { 923f2ec16ccSHideki Saito if (auto *C = dyn_cast<Constant>(Operand)) 924f2ec16ccSHideki Saito if (C->canTrap()) 925f2ec16ccSHideki Saito return false; 926f2ec16ccSHideki Saito } 92723c11380SFlorian Hahn 92823c11380SFlorian Hahn // We can predicate blocks with calls to assume, as long as we drop them in 92923c11380SFlorian Hahn // case we flatten the CFG via predication. 93023c11380SFlorian Hahn if (match(&I, m_Intrinsic<Intrinsic::assume>())) { 93123c11380SFlorian Hahn ConditionalAssumes.insert(&I); 93223c11380SFlorian Hahn continue; 93323c11380SFlorian Hahn } 93423c11380SFlorian Hahn 935f2ec16ccSHideki Saito // We might be able to hoist the load. 936f2ec16ccSHideki Saito if (I.mayReadFromMemory()) { 937f2ec16ccSHideki Saito auto *LI = dyn_cast<LoadInst>(&I); 938f2ec16ccSHideki Saito if (!LI) 939f2ec16ccSHideki Saito return false; 940f2ec16ccSHideki Saito if (!SafePtrs.count(LI->getPointerOperand())) { 941f2ec16ccSHideki Saito // !llvm.mem.parallel_loop_access implies if-conversion safety. 942f2ec16ccSHideki Saito // Otherwise, record that the load needs (real or emulated) masking 943f2ec16ccSHideki Saito // and let the cost model decide. 944d57d73daSDorit Nuzman if (!IsAnnotatedParallel || PreserveGuards) 945f2ec16ccSHideki Saito MaskedOp.insert(LI); 946f2ec16ccSHideki Saito continue; 947f2ec16ccSHideki Saito } 948f2ec16ccSHideki Saito } 949f2ec16ccSHideki Saito 950f2ec16ccSHideki Saito if (I.mayWriteToMemory()) { 951f2ec16ccSHideki Saito auto *SI = dyn_cast<StoreInst>(&I); 952f2ec16ccSHideki Saito if (!SI) 953f2ec16ccSHideki Saito return false; 954f2ec16ccSHideki Saito // Predicated store requires some form of masking: 955f2ec16ccSHideki Saito // 1) masked store HW instruction, 956f2ec16ccSHideki Saito // 2) emulation via load-blend-store (only if safe and legal to do so, 957f2ec16ccSHideki Saito // be aware on the race conditions), or 958f2ec16ccSHideki Saito // 3) element-by-element predicate check and scalar store. 959f2ec16ccSHideki Saito MaskedOp.insert(SI); 960f2ec16ccSHideki Saito continue; 961f2ec16ccSHideki Saito } 962f2ec16ccSHideki Saito if (I.mayThrow()) 963f2ec16ccSHideki Saito return false; 964f2ec16ccSHideki Saito } 965f2ec16ccSHideki Saito 966f2ec16ccSHideki Saito return true; 967f2ec16ccSHideki Saito } 968f2ec16ccSHideki Saito 969f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() { 970f2ec16ccSHideki Saito if (!EnableIfConversion) { 9719e97caf5SRenato Golin reportVectorizationFailure("If-conversion is disabled", 9729e97caf5SRenato Golin "if-conversion is disabled", 973ec818d7fSHideki Saito "IfConversionDisabled", 974ec818d7fSHideki Saito ORE, TheLoop); 975f2ec16ccSHideki Saito return false; 976f2ec16ccSHideki Saito } 977f2ec16ccSHideki Saito 978f2ec16ccSHideki Saito assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); 979f2ec16ccSHideki Saito 980cf3b5559SPhilip Reames // A list of pointers which are known to be dereferenceable within scope of 981cf3b5559SPhilip Reames // the loop body for each iteration of the loop which executes. That is, 982cf3b5559SPhilip Reames // the memory pointed to can be dereferenced (with the access size implied by 983cf3b5559SPhilip Reames // the value's type) unconditionally within the loop header without 984cf3b5559SPhilip Reames // introducing a new fault. 985f2ec16ccSHideki Saito SmallPtrSet<Value *, 8> SafePointes; 986f2ec16ccSHideki Saito 987f2ec16ccSHideki Saito // Collect safe addresses. 988f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 9897403569bSPhilip Reames if (!blockNeedsPredication(BB)) { 990f2ec16ccSHideki Saito for (Instruction &I : *BB) 991f2ec16ccSHideki Saito if (auto *Ptr = getLoadStorePointerOperand(&I)) 992f2ec16ccSHideki Saito SafePointes.insert(Ptr); 9937403569bSPhilip Reames continue; 9947403569bSPhilip Reames } 9957403569bSPhilip Reames 9967403569bSPhilip Reames // For a block which requires predication, a address may be safe to access 9977403569bSPhilip Reames // in the loop w/o predication if we can prove dereferenceability facts 9987403569bSPhilip Reames // sufficient to ensure it'll never fault within the loop. For the moment, 9997403569bSPhilip Reames // we restrict this to loads; stores are more complicated due to 10007403569bSPhilip Reames // concurrency restrictions. 10017403569bSPhilip Reames ScalarEvolution &SE = *PSE.getSE(); 10027403569bSPhilip Reames for (Instruction &I : *BB) { 10037403569bSPhilip Reames LoadInst *LI = dyn_cast<LoadInst>(&I); 10047403569bSPhilip Reames if (LI && !mustSuppressSpeculation(*LI) && 10057403569bSPhilip Reames isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) 10067403569bSPhilip Reames SafePointes.insert(LI->getPointerOperand()); 10077403569bSPhilip Reames } 1008f2ec16ccSHideki Saito } 1009f2ec16ccSHideki Saito 1010f2ec16ccSHideki Saito // Collect the blocks that need predication. 1011f2ec16ccSHideki Saito BasicBlock *Header = TheLoop->getHeader(); 1012f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 1013f2ec16ccSHideki Saito // We don't support switch statements inside loops. 1014f2ec16ccSHideki Saito if (!isa<BranchInst>(BB->getTerminator())) { 10159e97caf5SRenato Golin reportVectorizationFailure("Loop contains a switch statement", 10169e97caf5SRenato Golin "loop contains a switch statement", 1017ec818d7fSHideki Saito "LoopContainsSwitch", ORE, TheLoop, 1018ec818d7fSHideki Saito BB->getTerminator()); 1019f2ec16ccSHideki Saito return false; 1020f2ec16ccSHideki Saito } 1021f2ec16ccSHideki Saito 1022f2ec16ccSHideki Saito // We must be able to predicate all blocks that need to be predicated. 1023f2ec16ccSHideki Saito if (blockNeedsPredication(BB)) { 1024f2ec16ccSHideki Saito if (!blockCanBePredicated(BB, SafePointes)) { 10259e97caf5SRenato Golin reportVectorizationFailure( 10269e97caf5SRenato Golin "Control flow cannot be substituted for a select", 10279e97caf5SRenato Golin "control flow cannot be substituted for a select", 1028ec818d7fSHideki Saito "NoCFGForSelect", ORE, TheLoop, 1029ec818d7fSHideki Saito BB->getTerminator()); 1030f2ec16ccSHideki Saito return false; 1031f2ec16ccSHideki Saito } 1032f2ec16ccSHideki Saito } else if (BB != Header && !canIfConvertPHINodes(BB)) { 10339e97caf5SRenato Golin reportVectorizationFailure( 10349e97caf5SRenato Golin "Control flow cannot be substituted for a select", 10359e97caf5SRenato Golin "control flow cannot be substituted for a select", 1036ec818d7fSHideki Saito "NoCFGForSelect", ORE, TheLoop, 1037ec818d7fSHideki Saito BB->getTerminator()); 1038f2ec16ccSHideki Saito return false; 1039f2ec16ccSHideki Saito } 1040f2ec16ccSHideki Saito } 1041f2ec16ccSHideki Saito 1042f2ec16ccSHideki Saito // We can if-convert this loop. 1043f2ec16ccSHideki Saito return true; 1044f2ec16ccSHideki Saito } 1045f2ec16ccSHideki Saito 1046f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG. 1047f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, 1048f2ec16ccSHideki Saito bool UseVPlanNativePath) { 1049f2ec16ccSHideki Saito assert((UseVPlanNativePath || Lp->empty()) && 1050f2ec16ccSHideki Saito "VPlan-native path is not enabled."); 1051f2ec16ccSHideki Saito 1052f2ec16ccSHideki Saito // TODO: ORE should be improved to show more accurate information when an 1053f2ec16ccSHideki Saito // outer loop can't be vectorized because a nested loop is not understood or 1054f2ec16ccSHideki Saito // legal. Something like: "outer_loop_location: loop not vectorized: 1055f2ec16ccSHideki Saito // (inner_loop_location) loop control flow is not understood by vectorizer". 1056f2ec16ccSHideki Saito 1057f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 1058f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1059f2ec16ccSHideki Saito bool Result = true; 1060f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1061f2ec16ccSHideki Saito 1062f2ec16ccSHideki Saito // We must have a loop in canonical form. Loops with indirectbr in them cannot 1063f2ec16ccSHideki Saito // be canonicalized. 1064f2ec16ccSHideki Saito if (!Lp->getLoopPreheader()) { 10659e97caf5SRenato Golin reportVectorizationFailure("Loop doesn't have a legal pre-header", 10669e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1067ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1068f2ec16ccSHideki Saito if (DoExtraAnalysis) 1069f2ec16ccSHideki Saito Result = false; 1070f2ec16ccSHideki Saito else 1071f2ec16ccSHideki Saito return false; 1072f2ec16ccSHideki Saito } 1073f2ec16ccSHideki Saito 1074f2ec16ccSHideki Saito // We must have a single backedge. 1075f2ec16ccSHideki Saito if (Lp->getNumBackEdges() != 1) { 10769e97caf5SRenato Golin reportVectorizationFailure("The loop must have a single backedge", 10779e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1078ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1079f2ec16ccSHideki Saito if (DoExtraAnalysis) 1080f2ec16ccSHideki Saito Result = false; 1081f2ec16ccSHideki Saito else 1082f2ec16ccSHideki Saito return false; 1083f2ec16ccSHideki Saito } 1084f2ec16ccSHideki Saito 1085f2ec16ccSHideki Saito // We must have a single exiting block. 1086f2ec16ccSHideki Saito if (!Lp->getExitingBlock()) { 10879e97caf5SRenato Golin reportVectorizationFailure("The loop must have an exiting block", 10889e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1089ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1090f2ec16ccSHideki Saito if (DoExtraAnalysis) 1091f2ec16ccSHideki Saito Result = false; 1092f2ec16ccSHideki Saito else 1093f2ec16ccSHideki Saito return false; 1094f2ec16ccSHideki Saito } 1095f2ec16ccSHideki Saito 1096f2ec16ccSHideki Saito // We only handle bottom-tested loops, i.e. loop in which the condition is 1097f2ec16ccSHideki Saito // checked at the end of each iteration. With that we can assume that all 1098f2ec16ccSHideki Saito // instructions in the loop are executed the same number of times. 1099f2ec16ccSHideki Saito if (Lp->getExitingBlock() != Lp->getLoopLatch()) { 11009e97caf5SRenato Golin reportVectorizationFailure("The exiting block is not the loop latch", 11019e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1102ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1103f2ec16ccSHideki Saito if (DoExtraAnalysis) 1104f2ec16ccSHideki Saito Result = false; 1105f2ec16ccSHideki Saito else 1106f2ec16ccSHideki Saito return false; 1107f2ec16ccSHideki Saito } 1108f2ec16ccSHideki Saito 1109f2ec16ccSHideki Saito return Result; 1110f2ec16ccSHideki Saito } 1111f2ec16ccSHideki Saito 1112f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG( 1113f2ec16ccSHideki Saito Loop *Lp, bool UseVPlanNativePath) { 1114f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 1115f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1116f2ec16ccSHideki Saito bool Result = true; 1117f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1118f2ec16ccSHideki Saito if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { 1119f2ec16ccSHideki Saito if (DoExtraAnalysis) 1120f2ec16ccSHideki Saito Result = false; 1121f2ec16ccSHideki Saito else 1122f2ec16ccSHideki Saito return false; 1123f2ec16ccSHideki Saito } 1124f2ec16ccSHideki Saito 1125f2ec16ccSHideki Saito // Recursively check whether the loop control flow of nested loops is 1126f2ec16ccSHideki Saito // understood. 1127f2ec16ccSHideki Saito for (Loop *SubLp : *Lp) 1128f2ec16ccSHideki Saito if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { 1129f2ec16ccSHideki Saito if (DoExtraAnalysis) 1130f2ec16ccSHideki Saito Result = false; 1131f2ec16ccSHideki Saito else 1132f2ec16ccSHideki Saito return false; 1133f2ec16ccSHideki Saito } 1134f2ec16ccSHideki Saito 1135f2ec16ccSHideki Saito return Result; 1136f2ec16ccSHideki Saito } 1137f2ec16ccSHideki Saito 1138f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { 1139f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 1140f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1141f2ec16ccSHideki Saito bool Result = true; 1142f2ec16ccSHideki Saito 1143f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1144f2ec16ccSHideki Saito // Check whether the loop-related control flow in the loop nest is expected by 1145f2ec16ccSHideki Saito // vectorizer. 1146f2ec16ccSHideki Saito if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { 1147f2ec16ccSHideki Saito if (DoExtraAnalysis) 1148f2ec16ccSHideki Saito Result = false; 1149f2ec16ccSHideki Saito else 1150f2ec16ccSHideki Saito return false; 1151f2ec16ccSHideki Saito } 1152f2ec16ccSHideki Saito 1153f2ec16ccSHideki Saito // We need to have a loop header. 1154d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() 1155f2ec16ccSHideki Saito << '\n'); 1156f2ec16ccSHideki Saito 1157f2ec16ccSHideki Saito // Specific checks for outer loops. We skip the remaining legal checks at this 1158f2ec16ccSHideki Saito // point because they don't support outer loops. 1159f2ec16ccSHideki Saito if (!TheLoop->empty()) { 1160f2ec16ccSHideki Saito assert(UseVPlanNativePath && "VPlan-native path is not enabled."); 1161f2ec16ccSHideki Saito 1162f2ec16ccSHideki Saito if (!canVectorizeOuterLoop()) { 11639e97caf5SRenato Golin reportVectorizationFailure("Unsupported outer loop", 11649e97caf5SRenato Golin "unsupported outer loop", 1165ec818d7fSHideki Saito "UnsupportedOuterLoop", 1166ec818d7fSHideki Saito ORE, TheLoop); 1167f2ec16ccSHideki Saito // TODO: Implement DoExtraAnalysis when subsequent legal checks support 1168f2ec16ccSHideki Saito // outer loops. 1169f2ec16ccSHideki Saito return false; 1170f2ec16ccSHideki Saito } 1171f2ec16ccSHideki Saito 1172d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); 1173f2ec16ccSHideki Saito return Result; 1174f2ec16ccSHideki Saito } 1175f2ec16ccSHideki Saito 1176f2ec16ccSHideki Saito assert(TheLoop->empty() && "Inner loop expected."); 1177f2ec16ccSHideki Saito // Check if we can if-convert non-single-bb loops. 1178f2ec16ccSHideki Saito unsigned NumBlocks = TheLoop->getNumBlocks(); 1179f2ec16ccSHideki Saito if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { 1180d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); 1181f2ec16ccSHideki Saito if (DoExtraAnalysis) 1182f2ec16ccSHideki Saito Result = false; 1183f2ec16ccSHideki Saito else 1184f2ec16ccSHideki Saito return false; 1185f2ec16ccSHideki Saito } 1186f2ec16ccSHideki Saito 1187f2ec16ccSHideki Saito // Check if we can vectorize the instructions and CFG in this loop. 1188f2ec16ccSHideki Saito if (!canVectorizeInstrs()) { 1189d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); 1190f2ec16ccSHideki Saito if (DoExtraAnalysis) 1191f2ec16ccSHideki Saito Result = false; 1192f2ec16ccSHideki Saito else 1193f2ec16ccSHideki Saito return false; 1194f2ec16ccSHideki Saito } 1195f2ec16ccSHideki Saito 1196f2ec16ccSHideki Saito // Go over each instruction and look at memory deps. 1197f2ec16ccSHideki Saito if (!canVectorizeMemory()) { 1198d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); 1199f2ec16ccSHideki Saito if (DoExtraAnalysis) 1200f2ec16ccSHideki Saito Result = false; 1201f2ec16ccSHideki Saito else 1202f2ec16ccSHideki Saito return false; 1203f2ec16ccSHideki Saito } 1204f2ec16ccSHideki Saito 1205d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" 1206f2ec16ccSHideki Saito << (LAI->getRuntimePointerChecking()->Need 1207f2ec16ccSHideki Saito ? " (with a runtime bound check)" 1208f2ec16ccSHideki Saito : "") 1209f2ec16ccSHideki Saito << "!\n"); 1210f2ec16ccSHideki Saito 1211f2ec16ccSHideki Saito unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; 1212f2ec16ccSHideki Saito if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) 1213f2ec16ccSHideki Saito SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; 1214f2ec16ccSHideki Saito 1215f2ec16ccSHideki Saito if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { 12169e97caf5SRenato Golin reportVectorizationFailure("Too many SCEV checks needed", 12179e97caf5SRenato Golin "Too many SCEV assumptions need to be made and checked at runtime", 1218ec818d7fSHideki Saito "TooManySCEVRunTimeChecks", ORE, TheLoop); 1219f2ec16ccSHideki Saito if (DoExtraAnalysis) 1220f2ec16ccSHideki Saito Result = false; 1221f2ec16ccSHideki Saito else 1222f2ec16ccSHideki Saito return false; 1223f2ec16ccSHideki Saito } 1224f2ec16ccSHideki Saito 1225f2ec16ccSHideki Saito // Okay! We've done all the tests. If any have failed, return false. Otherwise 1226f2ec16ccSHideki Saito // we can vectorize, and at this point we don't have any other mem analysis 1227f2ec16ccSHideki Saito // which may limit our maximum vectorization factor, so just return true with 1228f2ec16ccSHideki Saito // no restrictions. 1229f2ec16ccSHideki Saito return Result; 1230f2ec16ccSHideki Saito } 1231f2ec16ccSHideki Saito 1232d57d73daSDorit Nuzman bool LoopVectorizationLegality::prepareToFoldTailByMasking() { 1233b0b5312eSAyal Zaks 1234b0b5312eSAyal Zaks LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); 1235b0b5312eSAyal Zaks 1236b0b5312eSAyal Zaks if (!PrimaryInduction) { 12379e97caf5SRenato Golin reportVectorizationFailure( 12389e97caf5SRenato Golin "No primary induction, cannot fold tail by masking", 12399e97caf5SRenato Golin "Missing a primary induction variable in the loop, which is " 12409e97caf5SRenato Golin "needed in order to fold tail by masking as required.", 1241ec818d7fSHideki Saito "NoPrimaryInduction", ORE, TheLoop); 1242b0b5312eSAyal Zaks return false; 1243b0b5312eSAyal Zaks } 1244b0b5312eSAyal Zaks 1245d15df0edSAyal Zaks SmallPtrSet<const Value *, 8> ReductionLiveOuts; 1246b0b5312eSAyal Zaks 1247*d0d38df0SDavid Green for (auto &Reduction : getReductionVars()) 1248d15df0edSAyal Zaks ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); 1249d15df0edSAyal Zaks 1250d15df0edSAyal Zaks // TODO: handle non-reduction outside users when tail is folded by masking. 1251b0b5312eSAyal Zaks for (auto *AE : AllowedExit) { 1252d15df0edSAyal Zaks // Check that all users of allowed exit values are inside the loop or 1253d15df0edSAyal Zaks // are the live-out of a reduction. 1254d15df0edSAyal Zaks if (ReductionLiveOuts.count(AE)) 1255d15df0edSAyal Zaks continue; 1256b0b5312eSAyal Zaks for (User *U : AE->users()) { 1257b0b5312eSAyal Zaks Instruction *UI = cast<Instruction>(U); 1258b0b5312eSAyal Zaks if (TheLoop->contains(UI)) 1259b0b5312eSAyal Zaks continue; 12609e97caf5SRenato Golin reportVectorizationFailure( 12619e97caf5SRenato Golin "Cannot fold tail by masking, loop has an outside user for", 12629e97caf5SRenato Golin "Cannot fold tail by masking in the presence of live outs.", 1263ec818d7fSHideki Saito "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); 1264b0b5312eSAyal Zaks return false; 1265b0b5312eSAyal Zaks } 1266b0b5312eSAyal Zaks } 1267b0b5312eSAyal Zaks 1268b0b5312eSAyal Zaks // The list of pointers that we can safely read and write to remains empty. 1269b0b5312eSAyal Zaks SmallPtrSet<Value *, 8> SafePointers; 1270b0b5312eSAyal Zaks 1271b0b5312eSAyal Zaks // Check and mark all blocks for predication, including those that ordinarily 1272b0b5312eSAyal Zaks // do not need predication such as the header block. 1273b0b5312eSAyal Zaks for (BasicBlock *BB : TheLoop->blocks()) { 1274d57d73daSDorit Nuzman if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { 12759e97caf5SRenato Golin reportVectorizationFailure( 12769e97caf5SRenato Golin "Cannot fold tail by masking as required", 12779e97caf5SRenato Golin "control flow cannot be substituted for a select", 1278ec818d7fSHideki Saito "NoCFGForSelect", ORE, TheLoop, 1279ec818d7fSHideki Saito BB->getTerminator()); 1280b0b5312eSAyal Zaks return false; 1281b0b5312eSAyal Zaks } 1282b0b5312eSAyal Zaks } 1283b0b5312eSAyal Zaks 1284b0b5312eSAyal Zaks LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); 1285b0b5312eSAyal Zaks return true; 1286b0b5312eSAyal Zaks } 1287b0b5312eSAyal Zaks 1288f2ec16ccSHideki Saito } // namespace llvm 1289