1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===// 2f2ec16ccSHideki Saito // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6f2ec16ccSHideki Saito // 7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===// 8f2ec16ccSHideki Saito // 9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code 10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time. 11f2ec16ccSHideki Saito // 12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis 13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there 14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first). 15f2ec16ccSHideki Saito // 16f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" 177403569bSPhilip Reames #include "llvm/Analysis/Loads.h" 18*a5f1f9c9SSimon Pilgrim #include "llvm/Analysis/LoopInfo.h" 197403569bSPhilip Reames #include "llvm/Analysis/ValueTracking.h" 20f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h" 21f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h" 2223c11380SFlorian Hahn #include "llvm/IR/PatternMatch.h" 2323c11380SFlorian Hahn #include "llvm/Transforms/Vectorize/LoopVectorize.h" 24f2ec16ccSHideki Saito 25f2ec16ccSHideki Saito using namespace llvm; 2623c11380SFlorian Hahn using namespace PatternMatch; 27f2ec16ccSHideki Saito 28f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize" 29f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME 30f2ec16ccSHideki Saito 314e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication; 324e4ecae0SHideki Saito 33f2ec16ccSHideki Saito static cl::opt<bool> 34f2ec16ccSHideki Saito EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, 35f2ec16ccSHideki Saito cl::desc("Enable if-conversion during vectorization.")); 36f2ec16ccSHideki Saito 37f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold( 38f2ec16ccSHideki Saito "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, 39f2ec16ccSHideki Saito cl::desc("The maximum allowed number of runtime memory checks with a " 40f2ec16ccSHideki Saito "vectorize(enable) pragma.")); 41f2ec16ccSHideki Saito 42f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold( 43f2ec16ccSHideki Saito "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, 44f2ec16ccSHideki Saito cl::desc("The maximum number of SCEV checks allowed.")); 45f2ec16ccSHideki Saito 46f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold( 47f2ec16ccSHideki Saito "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, 48f2ec16ccSHideki Saito cl::desc("The maximum number of SCEV checks allowed with a " 49f2ec16ccSHideki Saito "vectorize(enable) pragma")); 50f2ec16ccSHideki Saito 51f2ec16ccSHideki Saito /// Maximum vectorization interleave count. 52f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16; 53f2ec16ccSHideki Saito 54f2ec16ccSHideki Saito namespace llvm { 55f2ec16ccSHideki Saito 56f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) { 57f2ec16ccSHideki Saito switch (Kind) { 58f2ec16ccSHideki Saito case HK_WIDTH: 59f2ec16ccSHideki Saito return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; 60f2ec16ccSHideki Saito case HK_UNROLL: 61f2ec16ccSHideki Saito return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; 62f2ec16ccSHideki Saito case HK_FORCE: 63f2ec16ccSHideki Saito return (Val <= 1); 64f2ec16ccSHideki Saito case HK_ISVECTORIZED: 6520b198ecSSjoerd Meijer case HK_PREDICATE: 66f2ec16ccSHideki Saito return (Val == 0 || Val == 1); 67f2ec16ccSHideki Saito } 68f2ec16ccSHideki Saito return false; 69f2ec16ccSHideki Saito } 70f2ec16ccSHideki Saito 71d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L, 72d4eb13c8SMichael Kruse bool InterleaveOnlyWhenForced, 73f2ec16ccSHideki Saito OptimizationRemarkEmitter &ORE) 74f2ec16ccSHideki Saito : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), 75d4eb13c8SMichael Kruse Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), 76f2ec16ccSHideki Saito Force("vectorize.enable", FK_Undefined, HK_FORCE), 7720b198ecSSjoerd Meijer IsVectorized("isvectorized", 0, HK_ISVECTORIZED), 78cb47b878SSjoerd Meijer Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L), 7920b198ecSSjoerd Meijer ORE(ORE) { 80f2ec16ccSHideki Saito // Populate values with existing loop metadata. 81f2ec16ccSHideki Saito getHintsFromMetadata(); 82f2ec16ccSHideki Saito 83f2ec16ccSHideki Saito // force-vector-interleave overrides DisableInterleaving. 84f2ec16ccSHideki Saito if (VectorizerParams::isInterleaveForced()) 85f2ec16ccSHideki Saito Interleave.Value = VectorizerParams::VectorizationInterleave; 86f2ec16ccSHideki Saito 87f2ec16ccSHideki Saito if (IsVectorized.Value != 1) 88f2ec16ccSHideki Saito // If the vectorization width and interleaving count are both 1 then 89f2ec16ccSHideki Saito // consider the loop to have been already vectorized because there's 90f2ec16ccSHideki Saito // nothing more that we can do. 91f2ec16ccSHideki Saito IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; 92d4eb13c8SMichael Kruse LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() 93f2ec16ccSHideki Saito << "LV: Interleaving disabled by the pass manager\n"); 94f2ec16ccSHideki Saito } 95f2ec16ccSHideki Saito 9677a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() { 9777a614a6SMichael Kruse LLVMContext &Context = TheLoop->getHeader()->getContext(); 9877a614a6SMichael Kruse 9977a614a6SMichael Kruse MDNode *IsVectorizedMD = MDNode::get( 10077a614a6SMichael Kruse Context, 10177a614a6SMichael Kruse {MDString::get(Context, "llvm.loop.isvectorized"), 10277a614a6SMichael Kruse ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); 10377a614a6SMichael Kruse MDNode *LoopID = TheLoop->getLoopID(); 10477a614a6SMichael Kruse MDNode *NewLoopID = 10577a614a6SMichael Kruse makePostTransformationMetadata(Context, LoopID, 10677a614a6SMichael Kruse {Twine(Prefix(), "vectorize.").str(), 10777a614a6SMichael Kruse Twine(Prefix(), "interleave.").str()}, 10877a614a6SMichael Kruse {IsVectorizedMD}); 10977a614a6SMichael Kruse TheLoop->setLoopID(NewLoopID); 11077a614a6SMichael Kruse 11177a614a6SMichael Kruse // Update internal cache. 11277a614a6SMichael Kruse IsVectorized.Value = 1; 11377a614a6SMichael Kruse } 11477a614a6SMichael Kruse 115d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization( 116d4eb13c8SMichael Kruse Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { 117f2ec16ccSHideki Saito if (getForce() == LoopVectorizeHints::FK_Disabled) { 118d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); 119f2ec16ccSHideki Saito emitRemarkWithHints(); 120f2ec16ccSHideki Saito return false; 121f2ec16ccSHideki Saito } 122f2ec16ccSHideki Saito 123d4eb13c8SMichael Kruse if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { 124d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); 125f2ec16ccSHideki Saito emitRemarkWithHints(); 126f2ec16ccSHideki Saito return false; 127f2ec16ccSHideki Saito } 128f2ec16ccSHideki Saito 129f2ec16ccSHideki Saito if (getIsVectorized() == 1) { 130d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); 131f2ec16ccSHideki Saito // FIXME: Add interleave.disable metadata. This will allow 132f2ec16ccSHideki Saito // vectorize.disable to be used without disabling the pass and errors 133f2ec16ccSHideki Saito // to differentiate between disabled vectorization and a width of 1. 134f2ec16ccSHideki Saito ORE.emit([&]() { 135f2ec16ccSHideki Saito return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), 136f2ec16ccSHideki Saito "AllDisabled", L->getStartLoc(), 137f2ec16ccSHideki Saito L->getHeader()) 138f2ec16ccSHideki Saito << "loop not vectorized: vectorization and interleaving are " 139f2ec16ccSHideki Saito "explicitly disabled, or the loop has already been " 140f2ec16ccSHideki Saito "vectorized"; 141f2ec16ccSHideki Saito }); 142f2ec16ccSHideki Saito return false; 143f2ec16ccSHideki Saito } 144f2ec16ccSHideki Saito 145f2ec16ccSHideki Saito return true; 146f2ec16ccSHideki Saito } 147f2ec16ccSHideki Saito 148f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const { 149f2ec16ccSHideki Saito using namespace ore; 150f2ec16ccSHideki Saito 151f2ec16ccSHideki Saito ORE.emit([&]() { 152f2ec16ccSHideki Saito if (Force.Value == LoopVectorizeHints::FK_Disabled) 153f2ec16ccSHideki Saito return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", 154f2ec16ccSHideki Saito TheLoop->getStartLoc(), 155f2ec16ccSHideki Saito TheLoop->getHeader()) 156f2ec16ccSHideki Saito << "loop not vectorized: vectorization is explicitly disabled"; 157f2ec16ccSHideki Saito else { 158f2ec16ccSHideki Saito OptimizationRemarkMissed R(LV_NAME, "MissedDetails", 159f2ec16ccSHideki Saito TheLoop->getStartLoc(), TheLoop->getHeader()); 160f2ec16ccSHideki Saito R << "loop not vectorized"; 161f2ec16ccSHideki Saito if (Force.Value == LoopVectorizeHints::FK_Enabled) { 162f2ec16ccSHideki Saito R << " (Force=" << NV("Force", true); 163f2ec16ccSHideki Saito if (Width.Value != 0) 164f2ec16ccSHideki Saito R << ", Vector Width=" << NV("VectorWidth", Width.Value); 165f2ec16ccSHideki Saito if (Interleave.Value != 0) 166f2ec16ccSHideki Saito R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); 167f2ec16ccSHideki Saito R << ")"; 168f2ec16ccSHideki Saito } 169f2ec16ccSHideki Saito return R; 170f2ec16ccSHideki Saito } 171f2ec16ccSHideki Saito }); 172f2ec16ccSHideki Saito } 173f2ec16ccSHideki Saito 174f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { 175f2ec16ccSHideki Saito if (getWidth() == 1) 176f2ec16ccSHideki Saito return LV_NAME; 177f2ec16ccSHideki Saito if (getForce() == LoopVectorizeHints::FK_Disabled) 178f2ec16ccSHideki Saito return LV_NAME; 179f2ec16ccSHideki Saito if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) 180f2ec16ccSHideki Saito return LV_NAME; 181f2ec16ccSHideki Saito return OptimizationRemarkAnalysis::AlwaysPrint; 182f2ec16ccSHideki Saito } 183f2ec16ccSHideki Saito 184f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() { 185f2ec16ccSHideki Saito MDNode *LoopID = TheLoop->getLoopID(); 186f2ec16ccSHideki Saito if (!LoopID) 187f2ec16ccSHideki Saito return; 188f2ec16ccSHideki Saito 189f2ec16ccSHideki Saito // First operand should refer to the loop id itself. 190f2ec16ccSHideki Saito assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 191f2ec16ccSHideki Saito assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 192f2ec16ccSHideki Saito 193f2ec16ccSHideki Saito for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 194f2ec16ccSHideki Saito const MDString *S = nullptr; 195f2ec16ccSHideki Saito SmallVector<Metadata *, 4> Args; 196f2ec16ccSHideki Saito 197f2ec16ccSHideki Saito // The expected hint is either a MDString or a MDNode with the first 198f2ec16ccSHideki Saito // operand a MDString. 199f2ec16ccSHideki Saito if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) { 200f2ec16ccSHideki Saito if (!MD || MD->getNumOperands() == 0) 201f2ec16ccSHideki Saito continue; 202f2ec16ccSHideki Saito S = dyn_cast<MDString>(MD->getOperand(0)); 203f2ec16ccSHideki Saito for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) 204f2ec16ccSHideki Saito Args.push_back(MD->getOperand(i)); 205f2ec16ccSHideki Saito } else { 206f2ec16ccSHideki Saito S = dyn_cast<MDString>(LoopID->getOperand(i)); 207f2ec16ccSHideki Saito assert(Args.size() == 0 && "too many arguments for MDString"); 208f2ec16ccSHideki Saito } 209f2ec16ccSHideki Saito 210f2ec16ccSHideki Saito if (!S) 211f2ec16ccSHideki Saito continue; 212f2ec16ccSHideki Saito 213f2ec16ccSHideki Saito // Check if the hint starts with the loop metadata prefix. 214f2ec16ccSHideki Saito StringRef Name = S->getString(); 215f2ec16ccSHideki Saito if (Args.size() == 1) 216f2ec16ccSHideki Saito setHint(Name, Args[0]); 217f2ec16ccSHideki Saito } 218f2ec16ccSHideki Saito } 219f2ec16ccSHideki Saito 220f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { 221f2ec16ccSHideki Saito if (!Name.startswith(Prefix())) 222f2ec16ccSHideki Saito return; 223f2ec16ccSHideki Saito Name = Name.substr(Prefix().size(), StringRef::npos); 224f2ec16ccSHideki Saito 225f2ec16ccSHideki Saito const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg); 226f2ec16ccSHideki Saito if (!C) 227f2ec16ccSHideki Saito return; 228f2ec16ccSHideki Saito unsigned Val = C->getZExtValue(); 229f2ec16ccSHideki Saito 23020b198ecSSjoerd Meijer Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; 231f2ec16ccSHideki Saito for (auto H : Hints) { 232f2ec16ccSHideki Saito if (Name == H->Name) { 233f2ec16ccSHideki Saito if (H->validate(Val)) 234f2ec16ccSHideki Saito H->Value = Val; 235f2ec16ccSHideki Saito else 236d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); 237f2ec16ccSHideki Saito break; 238f2ec16ccSHideki Saito } 239f2ec16ccSHideki Saito } 240f2ec16ccSHideki Saito } 241f2ec16ccSHideki Saito 242f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet( 243f2ec16ccSHideki Saito Function *F, Loop *L, const LoopVectorizeHints &Hints) { 244f2ec16ccSHideki Saito const char *PassName = Hints.vectorizeAnalysisPassName(); 245f2ec16ccSHideki Saito bool Failed = false; 246f2ec16ccSHideki Saito if (UnsafeAlgebraInst && !Hints.allowReordering()) { 247f2ec16ccSHideki Saito ORE.emit([&]() { 248f2ec16ccSHideki Saito return OptimizationRemarkAnalysisFPCommute( 249f2ec16ccSHideki Saito PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), 250f2ec16ccSHideki Saito UnsafeAlgebraInst->getParent()) 251f2ec16ccSHideki Saito << "loop not vectorized: cannot prove it is safe to reorder " 252f2ec16ccSHideki Saito "floating-point operations"; 253f2ec16ccSHideki Saito }); 254f2ec16ccSHideki Saito Failed = true; 255f2ec16ccSHideki Saito } 256f2ec16ccSHideki Saito 257f2ec16ccSHideki Saito // Test if runtime memcheck thresholds are exceeded. 258f2ec16ccSHideki Saito bool PragmaThresholdReached = 259f2ec16ccSHideki Saito NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; 260f2ec16ccSHideki Saito bool ThresholdReached = 261f2ec16ccSHideki Saito NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; 262f2ec16ccSHideki Saito if ((ThresholdReached && !Hints.allowReordering()) || 263f2ec16ccSHideki Saito PragmaThresholdReached) { 264f2ec16ccSHideki Saito ORE.emit([&]() { 265f2ec16ccSHideki Saito return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", 266f2ec16ccSHideki Saito L->getStartLoc(), 267f2ec16ccSHideki Saito L->getHeader()) 268f2ec16ccSHideki Saito << "loop not vectorized: cannot prove it is safe to reorder " 269f2ec16ccSHideki Saito "memory operations"; 270f2ec16ccSHideki Saito }); 271d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); 272f2ec16ccSHideki Saito Failed = true; 273f2ec16ccSHideki Saito } 274f2ec16ccSHideki Saito 275f2ec16ccSHideki Saito return Failed; 276f2ec16ccSHideki Saito } 277f2ec16ccSHideki Saito 278f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop 279f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes 280f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is 281f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is 282f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions: 283f2ec16ccSHideki Saito // 1) it has a canonical IV (starting from 0 and with stride 1), 284f2ec16ccSHideki Saito // 2) its latch terminator is a conditional branch and, 285f2ec16ccSHideki Saito // 3) its latch condition is a compare instruction whose operands are the 286f2ec16ccSHideki Saito // canonical IV and an OuterLp invariant. 287f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not 288f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity. 289f2ec16ccSHideki Saito // 290f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation 291f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop 292f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and 293f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for 294f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we 295f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer 296f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit 297f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away 298f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not 299f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate 300f2ec16ccSHideki Saito // function that is only executed once for each \p Lp. 301f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { 302f2ec16ccSHideki Saito assert(Lp->getLoopLatch() && "Expected loop with a single latch."); 303f2ec16ccSHideki Saito 304f2ec16ccSHideki Saito // If Lp is the outer loop, it's uniform by definition. 305f2ec16ccSHideki Saito if (Lp == OuterLp) 306f2ec16ccSHideki Saito return true; 307f2ec16ccSHideki Saito assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); 308f2ec16ccSHideki Saito 309f2ec16ccSHideki Saito // 1. 310f2ec16ccSHideki Saito PHINode *IV = Lp->getCanonicalInductionVariable(); 311f2ec16ccSHideki Saito if (!IV) { 312d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); 313f2ec16ccSHideki Saito return false; 314f2ec16ccSHideki Saito } 315f2ec16ccSHideki Saito 316f2ec16ccSHideki Saito // 2. 317f2ec16ccSHideki Saito BasicBlock *Latch = Lp->getLoopLatch(); 318f2ec16ccSHideki Saito auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 319f2ec16ccSHideki Saito if (!LatchBr || LatchBr->isUnconditional()) { 320d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); 321f2ec16ccSHideki Saito return false; 322f2ec16ccSHideki Saito } 323f2ec16ccSHideki Saito 324f2ec16ccSHideki Saito // 3. 325f2ec16ccSHideki Saito auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); 326f2ec16ccSHideki Saito if (!LatchCmp) { 327d34e60caSNicola Zaghen LLVM_DEBUG( 328d34e60caSNicola Zaghen dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); 329f2ec16ccSHideki Saito return false; 330f2ec16ccSHideki Saito } 331f2ec16ccSHideki Saito 332f2ec16ccSHideki Saito Value *CondOp0 = LatchCmp->getOperand(0); 333f2ec16ccSHideki Saito Value *CondOp1 = LatchCmp->getOperand(1); 334f2ec16ccSHideki Saito Value *IVUpdate = IV->getIncomingValueForBlock(Latch); 335f2ec16ccSHideki Saito if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && 336f2ec16ccSHideki Saito !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { 337d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); 338f2ec16ccSHideki Saito return false; 339f2ec16ccSHideki Saito } 340f2ec16ccSHideki Saito 341f2ec16ccSHideki Saito return true; 342f2ec16ccSHideki Saito } 343f2ec16ccSHideki Saito 344f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p 345f2ec16ccSHideki Saito // OuterLp. 346f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { 347f2ec16ccSHideki Saito if (!isUniformLoop(Lp, OuterLp)) 348f2ec16ccSHideki Saito return false; 349f2ec16ccSHideki Saito 350f2ec16ccSHideki Saito // Check if nested loops are uniform. 351f2ec16ccSHideki Saito for (Loop *SubLp : *Lp) 352f2ec16ccSHideki Saito if (!isUniformLoopNest(SubLp, OuterLp)) 353f2ec16ccSHideki Saito return false; 354f2ec16ccSHideki Saito 355f2ec16ccSHideki Saito return true; 356f2ec16ccSHideki Saito } 357f2ec16ccSHideki Saito 3585f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node. 359f2ec16ccSHideki Saito /// 360f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if 361f2ec16ccSHideki Saito /// convert. 362f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) { 363f2ec16ccSHideki Saito for (PHINode &Phi : BB->phis()) { 364f2ec16ccSHideki Saito for (Value *V : Phi.incoming_values()) 365f2ec16ccSHideki Saito if (auto *C = dyn_cast<Constant>(V)) 366f2ec16ccSHideki Saito if (C->canTrap()) 367f2ec16ccSHideki Saito return false; 368f2ec16ccSHideki Saito } 369f2ec16ccSHideki Saito return true; 370f2ec16ccSHideki Saito } 371f2ec16ccSHideki Saito 372f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { 373f2ec16ccSHideki Saito if (Ty->isPointerTy()) 374f2ec16ccSHideki Saito return DL.getIntPtrType(Ty); 375f2ec16ccSHideki Saito 376f2ec16ccSHideki Saito // It is possible that char's or short's overflow when we ask for the loop's 377f2ec16ccSHideki Saito // trip count, work around this by changing the type size. 378f2ec16ccSHideki Saito if (Ty->getScalarSizeInBits() < 32) 379f2ec16ccSHideki Saito return Type::getInt32Ty(Ty->getContext()); 380f2ec16ccSHideki Saito 381f2ec16ccSHideki Saito return Ty; 382f2ec16ccSHideki Saito } 383f2ec16ccSHideki Saito 384f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { 385f2ec16ccSHideki Saito Ty0 = convertPointerToIntegerType(DL, Ty0); 386f2ec16ccSHideki Saito Ty1 = convertPointerToIntegerType(DL, Ty1); 387f2ec16ccSHideki Saito if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) 388f2ec16ccSHideki Saito return Ty0; 389f2ec16ccSHideki Saito return Ty1; 390f2ec16ccSHideki Saito } 391f2ec16ccSHideki Saito 3925f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an 393f2ec16ccSHideki Saito /// identified reduction variable. 394f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, 395f2ec16ccSHideki Saito SmallPtrSetImpl<Value *> &AllowedExit) { 39660a1e4ddSAnna Thomas // Reductions, Inductions and non-header phis are allowed to have exit users. All 397f2ec16ccSHideki Saito // other instructions must not have external users. 398f2ec16ccSHideki Saito if (!AllowedExit.count(Inst)) 399f2ec16ccSHideki Saito // Check that all of the users of the loop are inside the BB. 400f2ec16ccSHideki Saito for (User *U : Inst->users()) { 401f2ec16ccSHideki Saito Instruction *UI = cast<Instruction>(U); 402f2ec16ccSHideki Saito // This user may be a reduction exit value. 403f2ec16ccSHideki Saito if (!TheLoop->contains(UI)) { 404d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); 405f2ec16ccSHideki Saito return true; 406f2ec16ccSHideki Saito } 407f2ec16ccSHideki Saito } 408f2ec16ccSHideki Saito return false; 409f2ec16ccSHideki Saito } 410f2ec16ccSHideki Saito 411f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { 412f2ec16ccSHideki Saito const ValueToValueMap &Strides = 413f2ec16ccSHideki Saito getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); 414f2ec16ccSHideki Saito 415d1170dbeSSjoerd Meijer bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize(); 416d1170dbeSSjoerd Meijer int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); 417f2ec16ccSHideki Saito if (Stride == 1 || Stride == -1) 418f2ec16ccSHideki Saito return Stride; 419f2ec16ccSHideki Saito return 0; 420f2ec16ccSHideki Saito } 421f2ec16ccSHideki Saito 422f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) { 423f2ec16ccSHideki Saito return LAI->isUniform(V); 424f2ec16ccSHideki Saito } 425f2ec16ccSHideki Saito 426f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() { 427f2ec16ccSHideki Saito assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); 428f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 429f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 430f2ec16ccSHideki Saito bool Result = true; 431f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 432f2ec16ccSHideki Saito 433f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 434f2ec16ccSHideki Saito // Check whether the BB terminator is a BranchInst. Any other terminator is 435f2ec16ccSHideki Saito // not supported yet. 436f2ec16ccSHideki Saito auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); 437f2ec16ccSHideki Saito if (!Br) { 4389e97caf5SRenato Golin reportVectorizationFailure("Unsupported basic block terminator", 4399e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 440ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 441f2ec16ccSHideki Saito if (DoExtraAnalysis) 442f2ec16ccSHideki Saito Result = false; 443f2ec16ccSHideki Saito else 444f2ec16ccSHideki Saito return false; 445f2ec16ccSHideki Saito } 446f2ec16ccSHideki Saito 447f2ec16ccSHideki Saito // Check whether the BranchInst is a supported one. Only unconditional 448f2ec16ccSHideki Saito // branches, conditional branches with an outer loop invariant condition or 449f2ec16ccSHideki Saito // backedges are supported. 4504e4ecae0SHideki Saito // FIXME: We skip these checks when VPlan predication is enabled as we 4514e4ecae0SHideki Saito // want to allow divergent branches. This whole check will be removed 4524e4ecae0SHideki Saito // once VPlan predication is on by default. 4534e4ecae0SHideki Saito if (!EnableVPlanPredication && Br && Br->isConditional() && 454f2ec16ccSHideki Saito !TheLoop->isLoopInvariant(Br->getCondition()) && 455f2ec16ccSHideki Saito !LI->isLoopHeader(Br->getSuccessor(0)) && 456f2ec16ccSHideki Saito !LI->isLoopHeader(Br->getSuccessor(1))) { 4579e97caf5SRenato Golin reportVectorizationFailure("Unsupported conditional branch", 4589e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 459ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 460f2ec16ccSHideki Saito if (DoExtraAnalysis) 461f2ec16ccSHideki Saito Result = false; 462f2ec16ccSHideki Saito else 463f2ec16ccSHideki Saito return false; 464f2ec16ccSHideki Saito } 465f2ec16ccSHideki Saito } 466f2ec16ccSHideki Saito 467f2ec16ccSHideki Saito // Check whether inner loops are uniform. At this point, we only support 468f2ec16ccSHideki Saito // simple outer loops scenarios with uniform nested loops. 469f2ec16ccSHideki Saito if (!isUniformLoopNest(TheLoop /*loop nest*/, 470f2ec16ccSHideki Saito TheLoop /*context outer loop*/)) { 4719e97caf5SRenato Golin reportVectorizationFailure("Outer loop contains divergent loops", 4729e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 473ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 474f2ec16ccSHideki Saito if (DoExtraAnalysis) 475f2ec16ccSHideki Saito Result = false; 476f2ec16ccSHideki Saito else 477f2ec16ccSHideki Saito return false; 478f2ec16ccSHideki Saito } 479f2ec16ccSHideki Saito 480ea7f3035SHideki Saito // Check whether we are able to set up outer loop induction. 481ea7f3035SHideki Saito if (!setupOuterLoopInductions()) { 4829e97caf5SRenato Golin reportVectorizationFailure("Unsupported outer loop Phi(s)", 4839e97caf5SRenato Golin "Unsupported outer loop Phi(s)", 484ec818d7fSHideki Saito "UnsupportedPhi", ORE, TheLoop); 485ea7f3035SHideki Saito if (DoExtraAnalysis) 486ea7f3035SHideki Saito Result = false; 487ea7f3035SHideki Saito else 488ea7f3035SHideki Saito return false; 489ea7f3035SHideki Saito } 490ea7f3035SHideki Saito 491f2ec16ccSHideki Saito return Result; 492f2ec16ccSHideki Saito } 493f2ec16ccSHideki Saito 494f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi( 495f2ec16ccSHideki Saito PHINode *Phi, const InductionDescriptor &ID, 496f2ec16ccSHideki Saito SmallPtrSetImpl<Value *> &AllowedExit) { 497f2ec16ccSHideki Saito Inductions[Phi] = ID; 498f2ec16ccSHideki Saito 499f2ec16ccSHideki Saito // In case this induction also comes with casts that we know we can ignore 500f2ec16ccSHideki Saito // in the vectorized loop body, record them here. All casts could be recorded 501f2ec16ccSHideki Saito // here for ignoring, but suffices to record only the first (as it is the 502f2ec16ccSHideki Saito // only one that may bw used outside the cast sequence). 503f2ec16ccSHideki Saito const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts(); 504f2ec16ccSHideki Saito if (!Casts.empty()) 505f2ec16ccSHideki Saito InductionCastsToIgnore.insert(*Casts.begin()); 506f2ec16ccSHideki Saito 507f2ec16ccSHideki Saito Type *PhiTy = Phi->getType(); 508f2ec16ccSHideki Saito const DataLayout &DL = Phi->getModule()->getDataLayout(); 509f2ec16ccSHideki Saito 510f2ec16ccSHideki Saito // Get the widest type. 511f2ec16ccSHideki Saito if (!PhiTy->isFloatingPointTy()) { 512f2ec16ccSHideki Saito if (!WidestIndTy) 513f2ec16ccSHideki Saito WidestIndTy = convertPointerToIntegerType(DL, PhiTy); 514f2ec16ccSHideki Saito else 515f2ec16ccSHideki Saito WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); 516f2ec16ccSHideki Saito } 517f2ec16ccSHideki Saito 518f2ec16ccSHideki Saito // Int inductions are special because we only allow one IV. 519f2ec16ccSHideki Saito if (ID.getKind() == InductionDescriptor::IK_IntInduction && 520f2ec16ccSHideki Saito ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && 521f2ec16ccSHideki Saito isa<Constant>(ID.getStartValue()) && 522f2ec16ccSHideki Saito cast<Constant>(ID.getStartValue())->isNullValue()) { 523f2ec16ccSHideki Saito 524f2ec16ccSHideki Saito // Use the phi node with the widest type as induction. Use the last 525f2ec16ccSHideki Saito // one if there are multiple (no good reason for doing this other 526f2ec16ccSHideki Saito // than it is expedient). We've checked that it begins at zero and 527f2ec16ccSHideki Saito // steps by one, so this is a canonical induction variable. 528f2ec16ccSHideki Saito if (!PrimaryInduction || PhiTy == WidestIndTy) 529f2ec16ccSHideki Saito PrimaryInduction = Phi; 530f2ec16ccSHideki Saito } 531f2ec16ccSHideki Saito 532f2ec16ccSHideki Saito // Both the PHI node itself, and the "post-increment" value feeding 533f2ec16ccSHideki Saito // back into the PHI node may have external users. 534f2ec16ccSHideki Saito // We can allow those uses, except if the SCEVs we have for them rely 535f2ec16ccSHideki Saito // on predicates that only hold within the loop, since allowing the exit 5366a1dd77fSAnna Thomas // currently means re-using this SCEV outside the loop (see PR33706 for more 5376a1dd77fSAnna Thomas // details). 538f2ec16ccSHideki Saito if (PSE.getUnionPredicate().isAlwaysTrue()) { 539f2ec16ccSHideki Saito AllowedExit.insert(Phi); 540f2ec16ccSHideki Saito AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); 541f2ec16ccSHideki Saito } 542f2ec16ccSHideki Saito 543d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); 544f2ec16ccSHideki Saito } 545f2ec16ccSHideki Saito 546ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() { 547ea7f3035SHideki Saito BasicBlock *Header = TheLoop->getHeader(); 548ea7f3035SHideki Saito 549ea7f3035SHideki Saito // Returns true if a given Phi is a supported induction. 550ea7f3035SHideki Saito auto isSupportedPhi = [&](PHINode &Phi) -> bool { 551ea7f3035SHideki Saito InductionDescriptor ID; 552ea7f3035SHideki Saito if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && 553ea7f3035SHideki Saito ID.getKind() == InductionDescriptor::IK_IntInduction) { 554ea7f3035SHideki Saito addInductionPhi(&Phi, ID, AllowedExit); 555ea7f3035SHideki Saito return true; 556ea7f3035SHideki Saito } else { 557ea7f3035SHideki Saito // Bail out for any Phi in the outer loop header that is not a supported 558ea7f3035SHideki Saito // induction. 559ea7f3035SHideki Saito LLVM_DEBUG( 560ea7f3035SHideki Saito dbgs() 561ea7f3035SHideki Saito << "LV: Found unsupported PHI for outer loop vectorization.\n"); 562ea7f3035SHideki Saito return false; 563ea7f3035SHideki Saito } 564ea7f3035SHideki Saito }; 565ea7f3035SHideki Saito 566ea7f3035SHideki Saito if (llvm::all_of(Header->phis(), isSupportedPhi)) 567ea7f3035SHideki Saito return true; 568ea7f3035SHideki Saito else 569ea7f3035SHideki Saito return false; 570ea7f3035SHideki Saito } 571ea7f3035SHideki Saito 57266c120f0SFrancesco Petrogalli /// Checks if a function is scalarizable according to the TLI, in 57366c120f0SFrancesco Petrogalli /// the sense that it should be vectorized and then expanded in 57466c120f0SFrancesco Petrogalli /// multiple scalarcalls. This is represented in the 57566c120f0SFrancesco Petrogalli /// TLI via mappings that do not specify a vector name, as in the 57666c120f0SFrancesco Petrogalli /// following example: 57766c120f0SFrancesco Petrogalli /// 57866c120f0SFrancesco Petrogalli /// const VecDesc VecIntrinsics[] = { 57966c120f0SFrancesco Petrogalli /// {"llvm.phx.abs.i32", "", 4} 58066c120f0SFrancesco Petrogalli /// }; 58166c120f0SFrancesco Petrogalli static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { 58266c120f0SFrancesco Petrogalli const StringRef ScalarName = CI.getCalledFunction()->getName(); 58366c120f0SFrancesco Petrogalli bool Scalarize = TLI.isFunctionVectorizable(ScalarName); 58466c120f0SFrancesco Petrogalli // Check that all known VFs are not associated to a vector 58566c120f0SFrancesco Petrogalli // function, i.e. the vector name is emty. 58666c120f0SFrancesco Petrogalli if (Scalarize) 58766c120f0SFrancesco Petrogalli for (unsigned VF = 2, WidestVF = TLI.getWidestVF(ScalarName); 58866c120f0SFrancesco Petrogalli VF <= WidestVF; VF *= 2) { 58966c120f0SFrancesco Petrogalli Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF); 59066c120f0SFrancesco Petrogalli } 59166c120f0SFrancesco Petrogalli return Scalarize; 59266c120f0SFrancesco Petrogalli } 59366c120f0SFrancesco Petrogalli 594f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() { 595f2ec16ccSHideki Saito BasicBlock *Header = TheLoop->getHeader(); 596f2ec16ccSHideki Saito 597f2ec16ccSHideki Saito // Look for the attribute signaling the absence of NaNs. 598f2ec16ccSHideki Saito Function &F = *Header->getParent(); 599f2ec16ccSHideki Saito HasFunNoNaNAttr = 600f2ec16ccSHideki Saito F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 601f2ec16ccSHideki Saito 602f2ec16ccSHideki Saito // For each block in the loop. 603f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 604f2ec16ccSHideki Saito // Scan the instructions in the block and look for hazards. 605f2ec16ccSHideki Saito for (Instruction &I : *BB) { 606f2ec16ccSHideki Saito if (auto *Phi = dyn_cast<PHINode>(&I)) { 607f2ec16ccSHideki Saito Type *PhiTy = Phi->getType(); 608f2ec16ccSHideki Saito // Check that this PHI type is allowed. 609f2ec16ccSHideki Saito if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 610f2ec16ccSHideki Saito !PhiTy->isPointerTy()) { 6119e97caf5SRenato Golin reportVectorizationFailure("Found a non-int non-pointer PHI", 6129e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 613ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 614f2ec16ccSHideki Saito return false; 615f2ec16ccSHideki Saito } 616f2ec16ccSHideki Saito 617f2ec16ccSHideki Saito // If this PHINode is not in the header block, then we know that we 618f2ec16ccSHideki Saito // can convert it to select during if-conversion. No need to check if 619f2ec16ccSHideki Saito // the PHIs in this block are induction or reduction variables. 620f2ec16ccSHideki Saito if (BB != Header) { 62160a1e4ddSAnna Thomas // Non-header phi nodes that have outside uses can be vectorized. Add 62260a1e4ddSAnna Thomas // them to the list of allowed exits. 62360a1e4ddSAnna Thomas // Unsafe cyclic dependencies with header phis are identified during 62460a1e4ddSAnna Thomas // legalization for reduction, induction and first order 62560a1e4ddSAnna Thomas // recurrences. 626dd18ce45SBjorn Pettersson AllowedExit.insert(&I); 627f2ec16ccSHideki Saito continue; 628f2ec16ccSHideki Saito } 629f2ec16ccSHideki Saito 630f2ec16ccSHideki Saito // We only allow if-converted PHIs with exactly two incoming values. 631f2ec16ccSHideki Saito if (Phi->getNumIncomingValues() != 2) { 6329e97caf5SRenato Golin reportVectorizationFailure("Found an invalid PHI", 6339e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 634ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop, Phi); 635f2ec16ccSHideki Saito return false; 636f2ec16ccSHideki Saito } 637f2ec16ccSHideki Saito 638f2ec16ccSHideki Saito RecurrenceDescriptor RedDes; 639f2ec16ccSHideki Saito if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, 640f2ec16ccSHideki Saito DT)) { 641f2ec16ccSHideki Saito if (RedDes.hasUnsafeAlgebra()) 642f2ec16ccSHideki Saito Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); 643f2ec16ccSHideki Saito AllowedExit.insert(RedDes.getLoopExitInstr()); 644f2ec16ccSHideki Saito Reductions[Phi] = RedDes; 645f2ec16ccSHideki Saito continue; 646f2ec16ccSHideki Saito } 647f2ec16ccSHideki Saito 648b02b0ad8SAnna Thomas // TODO: Instead of recording the AllowedExit, it would be good to record the 649b02b0ad8SAnna Thomas // complementary set: NotAllowedExit. These include (but may not be 650b02b0ad8SAnna Thomas // limited to): 651b02b0ad8SAnna Thomas // 1. Reduction phis as they represent the one-before-last value, which 652b02b0ad8SAnna Thomas // is not available when vectorized 653b02b0ad8SAnna Thomas // 2. Induction phis and increment when SCEV predicates cannot be used 654b02b0ad8SAnna Thomas // outside the loop - see addInductionPhi 655b02b0ad8SAnna Thomas // 3. Non-Phis with outside uses when SCEV predicates cannot be used 656b02b0ad8SAnna Thomas // outside the loop - see call to hasOutsideLoopUser in the non-phi 657b02b0ad8SAnna Thomas // handling below 658b02b0ad8SAnna Thomas // 4. FirstOrderRecurrence phis that can possibly be handled by 659b02b0ad8SAnna Thomas // extraction. 660b02b0ad8SAnna Thomas // By recording these, we can then reason about ways to vectorize each 661b02b0ad8SAnna Thomas // of these NotAllowedExit. 662f2ec16ccSHideki Saito InductionDescriptor ID; 663f2ec16ccSHideki Saito if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { 664f2ec16ccSHideki Saito addInductionPhi(Phi, ID, AllowedExit); 665f2ec16ccSHideki Saito if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) 666f2ec16ccSHideki Saito Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); 667f2ec16ccSHideki Saito continue; 668f2ec16ccSHideki Saito } 669f2ec16ccSHideki Saito 670f2ec16ccSHideki Saito if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, 671f2ec16ccSHideki Saito SinkAfter, DT)) { 6728e0c5f72SAyal Zaks AllowedExit.insert(Phi); 673f2ec16ccSHideki Saito FirstOrderRecurrences.insert(Phi); 674f2ec16ccSHideki Saito continue; 675f2ec16ccSHideki Saito } 676f2ec16ccSHideki Saito 677f2ec16ccSHideki Saito // As a last resort, coerce the PHI to a AddRec expression 678f2ec16ccSHideki Saito // and re-try classifying it a an induction PHI. 679f2ec16ccSHideki Saito if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { 680f2ec16ccSHideki Saito addInductionPhi(Phi, ID, AllowedExit); 681f2ec16ccSHideki Saito continue; 682f2ec16ccSHideki Saito } 683f2ec16ccSHideki Saito 6849e97caf5SRenato Golin reportVectorizationFailure("Found an unidentified PHI", 6859e97caf5SRenato Golin "value that could not be identified as " 6869e97caf5SRenato Golin "reduction is used outside the loop", 687ec818d7fSHideki Saito "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); 688f2ec16ccSHideki Saito return false; 689f2ec16ccSHideki Saito } // end of PHI handling 690f2ec16ccSHideki Saito 691f2ec16ccSHideki Saito // We handle calls that: 692f2ec16ccSHideki Saito // * Are debug info intrinsics. 693f2ec16ccSHideki Saito // * Have a mapping to an IR intrinsic. 694f2ec16ccSHideki Saito // * Have a vector version available. 695f2ec16ccSHideki Saito auto *CI = dyn_cast<CallInst>(&I); 69666c120f0SFrancesco Petrogalli 697f2ec16ccSHideki Saito if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && 698f2ec16ccSHideki Saito !isa<DbgInfoIntrinsic>(CI) && 699f2ec16ccSHideki Saito !(CI->getCalledFunction() && TLI && 70066c120f0SFrancesco Petrogalli (!VFDatabase::getMappings(*CI).empty() || 70166c120f0SFrancesco Petrogalli isTLIScalarize(*TLI, *CI)))) { 7027d65fe5cSSanjay Patel // If the call is a recognized math libary call, it is likely that 7037d65fe5cSSanjay Patel // we can vectorize it given loosened floating-point constraints. 7047d65fe5cSSanjay Patel LibFunc Func; 7057d65fe5cSSanjay Patel bool IsMathLibCall = 7067d65fe5cSSanjay Patel TLI && CI->getCalledFunction() && 7077d65fe5cSSanjay Patel CI->getType()->isFloatingPointTy() && 7087d65fe5cSSanjay Patel TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && 7097d65fe5cSSanjay Patel TLI->hasOptimizedCodeGen(Func); 7107d65fe5cSSanjay Patel 7117d65fe5cSSanjay Patel if (IsMathLibCall) { 7127d65fe5cSSanjay Patel // TODO: Ideally, we should not use clang-specific language here, 7137d65fe5cSSanjay Patel // but it's hard to provide meaningful yet generic advice. 7147d65fe5cSSanjay Patel // Also, should this be guarded by allowExtraAnalysis() and/or be part 7157d65fe5cSSanjay Patel // of the returned info from isFunctionVectorizable()? 71666c120f0SFrancesco Petrogalli reportVectorizationFailure( 71766c120f0SFrancesco Petrogalli "Found a non-intrinsic callsite", 7189e97caf5SRenato Golin "library call cannot be vectorized. " 7197d65fe5cSSanjay Patel "Try compiling with -fno-math-errno, -ffast-math, " 7209e97caf5SRenato Golin "or similar flags", 721ec818d7fSHideki Saito "CantVectorizeLibcall", ORE, TheLoop, CI); 7227d65fe5cSSanjay Patel } else { 7239e97caf5SRenato Golin reportVectorizationFailure("Found a non-intrinsic callsite", 7249e97caf5SRenato Golin "call instruction cannot be vectorized", 725ec818d7fSHideki Saito "CantVectorizeLibcall", ORE, TheLoop, CI); 7267d65fe5cSSanjay Patel } 727f2ec16ccSHideki Saito return false; 728f2ec16ccSHideki Saito } 729f2ec16ccSHideki Saito 730a066f1f9SSimon Pilgrim // Some intrinsics have scalar arguments and should be same in order for 731a066f1f9SSimon Pilgrim // them to be vectorized (i.e. loop invariant). 732a066f1f9SSimon Pilgrim if (CI) { 733f2ec16ccSHideki Saito auto *SE = PSE.getSE(); 734a066f1f9SSimon Pilgrim Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); 735a066f1f9SSimon Pilgrim for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) 736a066f1f9SSimon Pilgrim if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { 737a066f1f9SSimon Pilgrim if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { 7389e97caf5SRenato Golin reportVectorizationFailure("Found unvectorizable intrinsic", 7399e97caf5SRenato Golin "intrinsic instruction cannot be vectorized", 740ec818d7fSHideki Saito "CantVectorizeIntrinsic", ORE, TheLoop, CI); 741f2ec16ccSHideki Saito return false; 742f2ec16ccSHideki Saito } 743f2ec16ccSHideki Saito } 744a066f1f9SSimon Pilgrim } 745f2ec16ccSHideki Saito 746f2ec16ccSHideki Saito // Check that the instruction return type is vectorizable. 747f2ec16ccSHideki Saito // Also, we can't vectorize extractelement instructions. 748f2ec16ccSHideki Saito if ((!VectorType::isValidElementType(I.getType()) && 749f2ec16ccSHideki Saito !I.getType()->isVoidTy()) || 750f2ec16ccSHideki Saito isa<ExtractElementInst>(I)) { 7519e97caf5SRenato Golin reportVectorizationFailure("Found unvectorizable type", 7529e97caf5SRenato Golin "instruction return type cannot be vectorized", 753ec818d7fSHideki Saito "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); 754f2ec16ccSHideki Saito return false; 755f2ec16ccSHideki Saito } 756f2ec16ccSHideki Saito 757f2ec16ccSHideki Saito // Check that the stored type is vectorizable. 758f2ec16ccSHideki Saito if (auto *ST = dyn_cast<StoreInst>(&I)) { 759f2ec16ccSHideki Saito Type *T = ST->getValueOperand()->getType(); 760f2ec16ccSHideki Saito if (!VectorType::isValidElementType(T)) { 7619e97caf5SRenato Golin reportVectorizationFailure("Store instruction cannot be vectorized", 7629e97caf5SRenato Golin "store instruction cannot be vectorized", 763ec818d7fSHideki Saito "CantVectorizeStore", ORE, TheLoop, ST); 764f2ec16ccSHideki Saito return false; 765f2ec16ccSHideki Saito } 766f2ec16ccSHideki Saito 7676452bdd2SWarren Ristow // For nontemporal stores, check that a nontemporal vector version is 7686452bdd2SWarren Ristow // supported on the target. 7696452bdd2SWarren Ristow if (ST->getMetadata(LLVMContext::MD_nontemporal)) { 7706452bdd2SWarren Ristow // Arbitrarily try a vector of 2 elements. 771d2befc66SChristopher Tetreault auto *VecTy = FixedVectorType::get(T, /*NumElements=*/2); 7726452bdd2SWarren Ristow assert(VecTy && "did not find vectorized version of stored type"); 77352e98f62SNikita Popov if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) { 7746452bdd2SWarren Ristow reportVectorizationFailure( 7756452bdd2SWarren Ristow "nontemporal store instruction cannot be vectorized", 7766452bdd2SWarren Ristow "nontemporal store instruction cannot be vectorized", 777ec818d7fSHideki Saito "CantVectorizeNontemporalStore", ORE, TheLoop, ST); 7786452bdd2SWarren Ristow return false; 7796452bdd2SWarren Ristow } 7806452bdd2SWarren Ristow } 7816452bdd2SWarren Ristow 7826452bdd2SWarren Ristow } else if (auto *LD = dyn_cast<LoadInst>(&I)) { 7836452bdd2SWarren Ristow if (LD->getMetadata(LLVMContext::MD_nontemporal)) { 7846452bdd2SWarren Ristow // For nontemporal loads, check that a nontemporal vector version is 7856452bdd2SWarren Ristow // supported on the target (arbitrarily try a vector of 2 elements). 786d2befc66SChristopher Tetreault auto *VecTy = FixedVectorType::get(I.getType(), /*NumElements=*/2); 7876452bdd2SWarren Ristow assert(VecTy && "did not find vectorized version of load type"); 78852e98f62SNikita Popov if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) { 7896452bdd2SWarren Ristow reportVectorizationFailure( 7906452bdd2SWarren Ristow "nontemporal load instruction cannot be vectorized", 7916452bdd2SWarren Ristow "nontemporal load instruction cannot be vectorized", 792ec818d7fSHideki Saito "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); 7936452bdd2SWarren Ristow return false; 7946452bdd2SWarren Ristow } 7956452bdd2SWarren Ristow } 7966452bdd2SWarren Ristow 797f2ec16ccSHideki Saito // FP instructions can allow unsafe algebra, thus vectorizable by 798f2ec16ccSHideki Saito // non-IEEE-754 compliant SIMD units. 799f2ec16ccSHideki Saito // This applies to floating-point math operations and calls, not memory 800f2ec16ccSHideki Saito // operations, shuffles, or casts, as they don't change precision or 801f2ec16ccSHideki Saito // semantics. 802f2ec16ccSHideki Saito } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && 803f2ec16ccSHideki Saito !I.isFast()) { 804d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); 805f2ec16ccSHideki Saito Hints->setPotentiallyUnsafe(); 806f2ec16ccSHideki Saito } 807f2ec16ccSHideki Saito 808f2ec16ccSHideki Saito // Reduction instructions are allowed to have exit users. 809f2ec16ccSHideki Saito // All other instructions must not have external users. 810f2ec16ccSHideki Saito if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { 811b02b0ad8SAnna Thomas // We can safely vectorize loops where instructions within the loop are 812b02b0ad8SAnna Thomas // used outside the loop only if the SCEV predicates within the loop is 813b02b0ad8SAnna Thomas // same as outside the loop. Allowing the exit means reusing the SCEV 814b02b0ad8SAnna Thomas // outside the loop. 815b02b0ad8SAnna Thomas if (PSE.getUnionPredicate().isAlwaysTrue()) { 816b02b0ad8SAnna Thomas AllowedExit.insert(&I); 817b02b0ad8SAnna Thomas continue; 818b02b0ad8SAnna Thomas } 8199e97caf5SRenato Golin reportVectorizationFailure("Value cannot be used outside the loop", 8209e97caf5SRenato Golin "value cannot be used outside the loop", 821ec818d7fSHideki Saito "ValueUsedOutsideLoop", ORE, TheLoop, &I); 822f2ec16ccSHideki Saito return false; 823f2ec16ccSHideki Saito } 824f2ec16ccSHideki Saito } // next instr. 825f2ec16ccSHideki Saito } 826f2ec16ccSHideki Saito 827f2ec16ccSHideki Saito if (!PrimaryInduction) { 828f2ec16ccSHideki Saito if (Inductions.empty()) { 8299e97caf5SRenato Golin reportVectorizationFailure("Did not find one integer induction var", 8309e97caf5SRenato Golin "loop induction variable could not be identified", 831ec818d7fSHideki Saito "NoInductionVariable", ORE, TheLoop); 832f2ec16ccSHideki Saito return false; 8334f27730eSWarren Ristow } else if (!WidestIndTy) { 8349e97caf5SRenato Golin reportVectorizationFailure("Did not find one integer induction var", 8359e97caf5SRenato Golin "integer loop induction variable could not be identified", 836ec818d7fSHideki Saito "NoIntegerInductionVariable", ORE, TheLoop); 8374f27730eSWarren Ristow return false; 8389e97caf5SRenato Golin } else { 8399e97caf5SRenato Golin LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); 840f2ec16ccSHideki Saito } 841f2ec16ccSHideki Saito } 842f2ec16ccSHideki Saito 8439d24933fSFlorian Hahn // For first order recurrences, we use the previous value (incoming value from 8449d24933fSFlorian Hahn // the latch) to check if it dominates all users of the recurrence. Bail out 8459d24933fSFlorian Hahn // if we have to sink such an instruction for another recurrence, as the 8469d24933fSFlorian Hahn // dominance requirement may not hold after sinking. 8479d24933fSFlorian Hahn BasicBlock *LoopLatch = TheLoop->getLoopLatch(); 8489d24933fSFlorian Hahn if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) { 8499d24933fSFlorian Hahn Instruction *V = 8509d24933fSFlorian Hahn cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch)); 8519d24933fSFlorian Hahn return SinkAfter.find(V) != SinkAfter.end(); 8529d24933fSFlorian Hahn })) 8539d24933fSFlorian Hahn return false; 8549d24933fSFlorian Hahn 855f2ec16ccSHideki Saito // Now we know the widest induction type, check if our found induction 856f2ec16ccSHideki Saito // is the same size. If it's not, unset it here and InnerLoopVectorizer 857f2ec16ccSHideki Saito // will create another. 858f2ec16ccSHideki Saito if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) 859f2ec16ccSHideki Saito PrimaryInduction = nullptr; 860f2ec16ccSHideki Saito 861f2ec16ccSHideki Saito return true; 862f2ec16ccSHideki Saito } 863f2ec16ccSHideki Saito 864f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() { 865f2ec16ccSHideki Saito LAI = &(*GetLAA)(*TheLoop); 866f2ec16ccSHideki Saito const OptimizationRemarkAnalysis *LAR = LAI->getReport(); 867f2ec16ccSHideki Saito if (LAR) { 868f2ec16ccSHideki Saito ORE->emit([&]() { 869f2ec16ccSHideki Saito return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), 870f2ec16ccSHideki Saito "loop not vectorized: ", *LAR); 871f2ec16ccSHideki Saito }); 872f2ec16ccSHideki Saito } 873f2ec16ccSHideki Saito if (!LAI->canVectorizeMemory()) 874f2ec16ccSHideki Saito return false; 875f2ec16ccSHideki Saito 8765e9215f0SAnna Thomas if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { 8779e97caf5SRenato Golin reportVectorizationFailure("Stores to a uniform address", 8789e97caf5SRenato Golin "write to a loop invariant address could not be vectorized", 879ec818d7fSHideki Saito "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 880f2ec16ccSHideki Saito return false; 881f2ec16ccSHideki Saito } 882f2ec16ccSHideki Saito Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); 883f2ec16ccSHideki Saito PSE.addPredicate(LAI->getPSE().getUnionPredicate()); 884f2ec16ccSHideki Saito 885f2ec16ccSHideki Saito return true; 886f2ec16ccSHideki Saito } 887f2ec16ccSHideki Saito 888f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) { 889f2ec16ccSHideki Saito Value *In0 = const_cast<Value *>(V); 890f2ec16ccSHideki Saito PHINode *PN = dyn_cast_or_null<PHINode>(In0); 891f2ec16ccSHideki Saito if (!PN) 892f2ec16ccSHideki Saito return false; 893f2ec16ccSHideki Saito 894f2ec16ccSHideki Saito return Inductions.count(PN); 895f2ec16ccSHideki Saito } 896f2ec16ccSHideki Saito 897f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { 898f2ec16ccSHideki Saito auto *Inst = dyn_cast<Instruction>(V); 899f2ec16ccSHideki Saito return (Inst && InductionCastsToIgnore.count(Inst)); 900f2ec16ccSHideki Saito } 901f2ec16ccSHideki Saito 902f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) { 903f2ec16ccSHideki Saito return isInductionPhi(V) || isCastedInductionVariable(V); 904f2ec16ccSHideki Saito } 905f2ec16ccSHideki Saito 906f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { 907f2ec16ccSHideki Saito return FirstOrderRecurrences.count(Phi); 908f2ec16ccSHideki Saito } 909f2ec16ccSHideki Saito 910f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { 911f2ec16ccSHideki Saito return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 912f2ec16ccSHideki Saito } 913f2ec16ccSHideki Saito 914f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated( 915d57d73daSDorit Nuzman BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) { 916f2ec16ccSHideki Saito const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 917f2ec16ccSHideki Saito 918f2ec16ccSHideki Saito for (Instruction &I : *BB) { 919f2ec16ccSHideki Saito // Check that we don't have a constant expression that can trap as operand. 920f2ec16ccSHideki Saito for (Value *Operand : I.operands()) { 921f2ec16ccSHideki Saito if (auto *C = dyn_cast<Constant>(Operand)) 922f2ec16ccSHideki Saito if (C->canTrap()) 923f2ec16ccSHideki Saito return false; 924f2ec16ccSHideki Saito } 92523c11380SFlorian Hahn 92623c11380SFlorian Hahn // We can predicate blocks with calls to assume, as long as we drop them in 92723c11380SFlorian Hahn // case we flatten the CFG via predication. 92823c11380SFlorian Hahn if (match(&I, m_Intrinsic<Intrinsic::assume>())) { 92923c11380SFlorian Hahn ConditionalAssumes.insert(&I); 93023c11380SFlorian Hahn continue; 93123c11380SFlorian Hahn } 93223c11380SFlorian Hahn 933f2ec16ccSHideki Saito // We might be able to hoist the load. 934f2ec16ccSHideki Saito if (I.mayReadFromMemory()) { 935f2ec16ccSHideki Saito auto *LI = dyn_cast<LoadInst>(&I); 936f2ec16ccSHideki Saito if (!LI) 937f2ec16ccSHideki Saito return false; 938f2ec16ccSHideki Saito if (!SafePtrs.count(LI->getPointerOperand())) { 939f2ec16ccSHideki Saito // !llvm.mem.parallel_loop_access implies if-conversion safety. 940f2ec16ccSHideki Saito // Otherwise, record that the load needs (real or emulated) masking 941f2ec16ccSHideki Saito // and let the cost model decide. 942d57d73daSDorit Nuzman if (!IsAnnotatedParallel || PreserveGuards) 943f2ec16ccSHideki Saito MaskedOp.insert(LI); 944f2ec16ccSHideki Saito continue; 945f2ec16ccSHideki Saito } 946f2ec16ccSHideki Saito } 947f2ec16ccSHideki Saito 948f2ec16ccSHideki Saito if (I.mayWriteToMemory()) { 949f2ec16ccSHideki Saito auto *SI = dyn_cast<StoreInst>(&I); 950f2ec16ccSHideki Saito if (!SI) 951f2ec16ccSHideki Saito return false; 952f2ec16ccSHideki Saito // Predicated store requires some form of masking: 953f2ec16ccSHideki Saito // 1) masked store HW instruction, 954f2ec16ccSHideki Saito // 2) emulation via load-blend-store (only if safe and legal to do so, 955f2ec16ccSHideki Saito // be aware on the race conditions), or 956f2ec16ccSHideki Saito // 3) element-by-element predicate check and scalar store. 957f2ec16ccSHideki Saito MaskedOp.insert(SI); 958f2ec16ccSHideki Saito continue; 959f2ec16ccSHideki Saito } 960f2ec16ccSHideki Saito if (I.mayThrow()) 961f2ec16ccSHideki Saito return false; 962f2ec16ccSHideki Saito } 963f2ec16ccSHideki Saito 964f2ec16ccSHideki Saito return true; 965f2ec16ccSHideki Saito } 966f2ec16ccSHideki Saito 967f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() { 968f2ec16ccSHideki Saito if (!EnableIfConversion) { 9699e97caf5SRenato Golin reportVectorizationFailure("If-conversion is disabled", 9709e97caf5SRenato Golin "if-conversion is disabled", 971ec818d7fSHideki Saito "IfConversionDisabled", 972ec818d7fSHideki Saito ORE, TheLoop); 973f2ec16ccSHideki Saito return false; 974f2ec16ccSHideki Saito } 975f2ec16ccSHideki Saito 976f2ec16ccSHideki Saito assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); 977f2ec16ccSHideki Saito 978cf3b5559SPhilip Reames // A list of pointers which are known to be dereferenceable within scope of 979cf3b5559SPhilip Reames // the loop body for each iteration of the loop which executes. That is, 980cf3b5559SPhilip Reames // the memory pointed to can be dereferenced (with the access size implied by 981cf3b5559SPhilip Reames // the value's type) unconditionally within the loop header without 982cf3b5559SPhilip Reames // introducing a new fault. 9833bbc71d6SSjoerd Meijer SmallPtrSet<Value *, 8> SafePointers; 984f2ec16ccSHideki Saito 985f2ec16ccSHideki Saito // Collect safe addresses. 986f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 9877403569bSPhilip Reames if (!blockNeedsPredication(BB)) { 988f2ec16ccSHideki Saito for (Instruction &I : *BB) 989f2ec16ccSHideki Saito if (auto *Ptr = getLoadStorePointerOperand(&I)) 9903bbc71d6SSjoerd Meijer SafePointers.insert(Ptr); 9917403569bSPhilip Reames continue; 9927403569bSPhilip Reames } 9937403569bSPhilip Reames 9947403569bSPhilip Reames // For a block which requires predication, a address may be safe to access 9957403569bSPhilip Reames // in the loop w/o predication if we can prove dereferenceability facts 9967403569bSPhilip Reames // sufficient to ensure it'll never fault within the loop. For the moment, 9977403569bSPhilip Reames // we restrict this to loads; stores are more complicated due to 9987403569bSPhilip Reames // concurrency restrictions. 9997403569bSPhilip Reames ScalarEvolution &SE = *PSE.getSE(); 10007403569bSPhilip Reames for (Instruction &I : *BB) { 10017403569bSPhilip Reames LoadInst *LI = dyn_cast<LoadInst>(&I); 10027403569bSPhilip Reames if (LI && !mustSuppressSpeculation(*LI) && 10037403569bSPhilip Reames isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) 10043bbc71d6SSjoerd Meijer SafePointers.insert(LI->getPointerOperand()); 10057403569bSPhilip Reames } 1006f2ec16ccSHideki Saito } 1007f2ec16ccSHideki Saito 1008f2ec16ccSHideki Saito // Collect the blocks that need predication. 1009f2ec16ccSHideki Saito BasicBlock *Header = TheLoop->getHeader(); 1010f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 1011f2ec16ccSHideki Saito // We don't support switch statements inside loops. 1012f2ec16ccSHideki Saito if (!isa<BranchInst>(BB->getTerminator())) { 10139e97caf5SRenato Golin reportVectorizationFailure("Loop contains a switch statement", 10149e97caf5SRenato Golin "loop contains a switch statement", 1015ec818d7fSHideki Saito "LoopContainsSwitch", ORE, TheLoop, 1016ec818d7fSHideki Saito BB->getTerminator()); 1017f2ec16ccSHideki Saito return false; 1018f2ec16ccSHideki Saito } 1019f2ec16ccSHideki Saito 1020f2ec16ccSHideki Saito // We must be able to predicate all blocks that need to be predicated. 1021f2ec16ccSHideki Saito if (blockNeedsPredication(BB)) { 10223bbc71d6SSjoerd Meijer if (!blockCanBePredicated(BB, SafePointers)) { 10239e97caf5SRenato Golin reportVectorizationFailure( 10249e97caf5SRenato Golin "Control flow cannot be substituted for a select", 10259e97caf5SRenato Golin "control flow cannot be substituted for a select", 1026ec818d7fSHideki Saito "NoCFGForSelect", ORE, TheLoop, 1027ec818d7fSHideki Saito BB->getTerminator()); 1028f2ec16ccSHideki Saito return false; 1029f2ec16ccSHideki Saito } 1030f2ec16ccSHideki Saito } else if (BB != Header && !canIfConvertPHINodes(BB)) { 10319e97caf5SRenato Golin reportVectorizationFailure( 10329e97caf5SRenato Golin "Control flow cannot be substituted for a select", 10339e97caf5SRenato Golin "control flow cannot be substituted for a select", 1034ec818d7fSHideki Saito "NoCFGForSelect", ORE, TheLoop, 1035ec818d7fSHideki Saito BB->getTerminator()); 1036f2ec16ccSHideki Saito return false; 1037f2ec16ccSHideki Saito } 1038f2ec16ccSHideki Saito } 1039f2ec16ccSHideki Saito 1040f2ec16ccSHideki Saito // We can if-convert this loop. 1041f2ec16ccSHideki Saito return true; 1042f2ec16ccSHideki Saito } 1043f2ec16ccSHideki Saito 1044f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG. 1045f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, 1046f2ec16ccSHideki Saito bool UseVPlanNativePath) { 1047f2ec16ccSHideki Saito assert((UseVPlanNativePath || Lp->empty()) && 1048f2ec16ccSHideki Saito "VPlan-native path is not enabled."); 1049f2ec16ccSHideki Saito 1050f2ec16ccSHideki Saito // TODO: ORE should be improved to show more accurate information when an 1051f2ec16ccSHideki Saito // outer loop can't be vectorized because a nested loop is not understood or 1052f2ec16ccSHideki Saito // legal. Something like: "outer_loop_location: loop not vectorized: 1053f2ec16ccSHideki Saito // (inner_loop_location) loop control flow is not understood by vectorizer". 1054f2ec16ccSHideki Saito 1055f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 1056f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1057f2ec16ccSHideki Saito bool Result = true; 1058f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1059f2ec16ccSHideki Saito 1060f2ec16ccSHideki Saito // We must have a loop in canonical form. Loops with indirectbr in them cannot 1061f2ec16ccSHideki Saito // be canonicalized. 1062f2ec16ccSHideki Saito if (!Lp->getLoopPreheader()) { 10639e97caf5SRenato Golin reportVectorizationFailure("Loop doesn't have a legal pre-header", 10649e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1065ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1066f2ec16ccSHideki Saito if (DoExtraAnalysis) 1067f2ec16ccSHideki Saito Result = false; 1068f2ec16ccSHideki Saito else 1069f2ec16ccSHideki Saito return false; 1070f2ec16ccSHideki Saito } 1071f2ec16ccSHideki Saito 1072f2ec16ccSHideki Saito // We must have a single backedge. 1073f2ec16ccSHideki Saito if (Lp->getNumBackEdges() != 1) { 10749e97caf5SRenato Golin reportVectorizationFailure("The loop must have a single backedge", 10759e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1076ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1077f2ec16ccSHideki Saito if (DoExtraAnalysis) 1078f2ec16ccSHideki Saito Result = false; 1079f2ec16ccSHideki Saito else 1080f2ec16ccSHideki Saito return false; 1081f2ec16ccSHideki Saito } 1082f2ec16ccSHideki Saito 1083f2ec16ccSHideki Saito // We must have a single exiting block. 1084f2ec16ccSHideki Saito if (!Lp->getExitingBlock()) { 10859e97caf5SRenato Golin reportVectorizationFailure("The loop must have an exiting block", 10869e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1087ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1088f2ec16ccSHideki Saito if (DoExtraAnalysis) 1089f2ec16ccSHideki Saito Result = false; 1090f2ec16ccSHideki Saito else 1091f2ec16ccSHideki Saito return false; 1092f2ec16ccSHideki Saito } 1093f2ec16ccSHideki Saito 1094f2ec16ccSHideki Saito // We only handle bottom-tested loops, i.e. loop in which the condition is 1095f2ec16ccSHideki Saito // checked at the end of each iteration. With that we can assume that all 1096f2ec16ccSHideki Saito // instructions in the loop are executed the same number of times. 1097f2ec16ccSHideki Saito if (Lp->getExitingBlock() != Lp->getLoopLatch()) { 10989e97caf5SRenato Golin reportVectorizationFailure("The exiting block is not the loop latch", 10999e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1100ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1101f2ec16ccSHideki Saito if (DoExtraAnalysis) 1102f2ec16ccSHideki Saito Result = false; 1103f2ec16ccSHideki Saito else 1104f2ec16ccSHideki Saito return false; 1105f2ec16ccSHideki Saito } 1106f2ec16ccSHideki Saito 1107f2ec16ccSHideki Saito return Result; 1108f2ec16ccSHideki Saito } 1109f2ec16ccSHideki Saito 1110f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG( 1111f2ec16ccSHideki Saito Loop *Lp, bool UseVPlanNativePath) { 1112f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 1113f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1114f2ec16ccSHideki Saito bool Result = true; 1115f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1116f2ec16ccSHideki Saito if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { 1117f2ec16ccSHideki Saito if (DoExtraAnalysis) 1118f2ec16ccSHideki Saito Result = false; 1119f2ec16ccSHideki Saito else 1120f2ec16ccSHideki Saito return false; 1121f2ec16ccSHideki Saito } 1122f2ec16ccSHideki Saito 1123f2ec16ccSHideki Saito // Recursively check whether the loop control flow of nested loops is 1124f2ec16ccSHideki Saito // understood. 1125f2ec16ccSHideki Saito for (Loop *SubLp : *Lp) 1126f2ec16ccSHideki Saito if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { 1127f2ec16ccSHideki Saito if (DoExtraAnalysis) 1128f2ec16ccSHideki Saito Result = false; 1129f2ec16ccSHideki Saito else 1130f2ec16ccSHideki Saito return false; 1131f2ec16ccSHideki Saito } 1132f2ec16ccSHideki Saito 1133f2ec16ccSHideki Saito return Result; 1134f2ec16ccSHideki Saito } 1135f2ec16ccSHideki Saito 1136f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { 1137f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 1138f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1139f2ec16ccSHideki Saito bool Result = true; 1140f2ec16ccSHideki Saito 1141f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1142f2ec16ccSHideki Saito // Check whether the loop-related control flow in the loop nest is expected by 1143f2ec16ccSHideki Saito // vectorizer. 1144f2ec16ccSHideki Saito if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { 1145f2ec16ccSHideki Saito if (DoExtraAnalysis) 1146f2ec16ccSHideki Saito Result = false; 1147f2ec16ccSHideki Saito else 1148f2ec16ccSHideki Saito return false; 1149f2ec16ccSHideki Saito } 1150f2ec16ccSHideki Saito 1151f2ec16ccSHideki Saito // We need to have a loop header. 1152d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() 1153f2ec16ccSHideki Saito << '\n'); 1154f2ec16ccSHideki Saito 1155f2ec16ccSHideki Saito // Specific checks for outer loops. We skip the remaining legal checks at this 1156f2ec16ccSHideki Saito // point because they don't support outer loops. 1157f2ec16ccSHideki Saito if (!TheLoop->empty()) { 1158f2ec16ccSHideki Saito assert(UseVPlanNativePath && "VPlan-native path is not enabled."); 1159f2ec16ccSHideki Saito 1160f2ec16ccSHideki Saito if (!canVectorizeOuterLoop()) { 11619e97caf5SRenato Golin reportVectorizationFailure("Unsupported outer loop", 11629e97caf5SRenato Golin "unsupported outer loop", 1163ec818d7fSHideki Saito "UnsupportedOuterLoop", 1164ec818d7fSHideki Saito ORE, TheLoop); 1165f2ec16ccSHideki Saito // TODO: Implement DoExtraAnalysis when subsequent legal checks support 1166f2ec16ccSHideki Saito // outer loops. 1167f2ec16ccSHideki Saito return false; 1168f2ec16ccSHideki Saito } 1169f2ec16ccSHideki Saito 1170d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); 1171f2ec16ccSHideki Saito return Result; 1172f2ec16ccSHideki Saito } 1173f2ec16ccSHideki Saito 1174f2ec16ccSHideki Saito assert(TheLoop->empty() && "Inner loop expected."); 1175f2ec16ccSHideki Saito // Check if we can if-convert non-single-bb loops. 1176f2ec16ccSHideki Saito unsigned NumBlocks = TheLoop->getNumBlocks(); 1177f2ec16ccSHideki Saito if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { 1178d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); 1179f2ec16ccSHideki Saito if (DoExtraAnalysis) 1180f2ec16ccSHideki Saito Result = false; 1181f2ec16ccSHideki Saito else 1182f2ec16ccSHideki Saito return false; 1183f2ec16ccSHideki Saito } 1184f2ec16ccSHideki Saito 1185f2ec16ccSHideki Saito // Check if we can vectorize the instructions and CFG in this loop. 1186f2ec16ccSHideki Saito if (!canVectorizeInstrs()) { 1187d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); 1188f2ec16ccSHideki Saito if (DoExtraAnalysis) 1189f2ec16ccSHideki Saito Result = false; 1190f2ec16ccSHideki Saito else 1191f2ec16ccSHideki Saito return false; 1192f2ec16ccSHideki Saito } 1193f2ec16ccSHideki Saito 1194f2ec16ccSHideki Saito // Go over each instruction and look at memory deps. 1195f2ec16ccSHideki Saito if (!canVectorizeMemory()) { 1196d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); 1197f2ec16ccSHideki Saito if (DoExtraAnalysis) 1198f2ec16ccSHideki Saito Result = false; 1199f2ec16ccSHideki Saito else 1200f2ec16ccSHideki Saito return false; 1201f2ec16ccSHideki Saito } 1202f2ec16ccSHideki Saito 1203d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" 1204f2ec16ccSHideki Saito << (LAI->getRuntimePointerChecking()->Need 1205f2ec16ccSHideki Saito ? " (with a runtime bound check)" 1206f2ec16ccSHideki Saito : "") 1207f2ec16ccSHideki Saito << "!\n"); 1208f2ec16ccSHideki Saito 1209f2ec16ccSHideki Saito unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; 1210f2ec16ccSHideki Saito if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) 1211f2ec16ccSHideki Saito SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; 1212f2ec16ccSHideki Saito 1213f2ec16ccSHideki Saito if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { 12149e97caf5SRenato Golin reportVectorizationFailure("Too many SCEV checks needed", 12159e97caf5SRenato Golin "Too many SCEV assumptions need to be made and checked at runtime", 1216ec818d7fSHideki Saito "TooManySCEVRunTimeChecks", ORE, TheLoop); 1217f2ec16ccSHideki Saito if (DoExtraAnalysis) 1218f2ec16ccSHideki Saito Result = false; 1219f2ec16ccSHideki Saito else 1220f2ec16ccSHideki Saito return false; 1221f2ec16ccSHideki Saito } 1222f2ec16ccSHideki Saito 1223f2ec16ccSHideki Saito // Okay! We've done all the tests. If any have failed, return false. Otherwise 1224f2ec16ccSHideki Saito // we can vectorize, and at this point we don't have any other mem analysis 1225f2ec16ccSHideki Saito // which may limit our maximum vectorization factor, so just return true with 1226f2ec16ccSHideki Saito // no restrictions. 1227f2ec16ccSHideki Saito return Result; 1228f2ec16ccSHideki Saito } 1229f2ec16ccSHideki Saito 1230d57d73daSDorit Nuzman bool LoopVectorizationLegality::prepareToFoldTailByMasking() { 1231b0b5312eSAyal Zaks 1232b0b5312eSAyal Zaks LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); 1233b0b5312eSAyal Zaks 1234d15df0edSAyal Zaks SmallPtrSet<const Value *, 8> ReductionLiveOuts; 1235b0b5312eSAyal Zaks 1236d0d38df0SDavid Green for (auto &Reduction : getReductionVars()) 1237d15df0edSAyal Zaks ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); 1238d15df0edSAyal Zaks 1239d15df0edSAyal Zaks // TODO: handle non-reduction outside users when tail is folded by masking. 1240b0b5312eSAyal Zaks for (auto *AE : AllowedExit) { 1241d15df0edSAyal Zaks // Check that all users of allowed exit values are inside the loop or 1242d15df0edSAyal Zaks // are the live-out of a reduction. 1243d15df0edSAyal Zaks if (ReductionLiveOuts.count(AE)) 1244d15df0edSAyal Zaks continue; 1245b0b5312eSAyal Zaks for (User *U : AE->users()) { 1246b0b5312eSAyal Zaks Instruction *UI = cast<Instruction>(U); 1247b0b5312eSAyal Zaks if (TheLoop->contains(UI)) 1248b0b5312eSAyal Zaks continue; 12499e97caf5SRenato Golin reportVectorizationFailure( 12509e97caf5SRenato Golin "Cannot fold tail by masking, loop has an outside user for", 12519e97caf5SRenato Golin "Cannot fold tail by masking in the presence of live outs.", 1252ec818d7fSHideki Saito "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); 1253b0b5312eSAyal Zaks return false; 1254b0b5312eSAyal Zaks } 1255b0b5312eSAyal Zaks } 1256b0b5312eSAyal Zaks 1257b0b5312eSAyal Zaks // The list of pointers that we can safely read and write to remains empty. 1258b0b5312eSAyal Zaks SmallPtrSet<Value *, 8> SafePointers; 1259b0b5312eSAyal Zaks 1260b0b5312eSAyal Zaks // Check and mark all blocks for predication, including those that ordinarily 1261b0b5312eSAyal Zaks // do not need predication such as the header block. 1262b0b5312eSAyal Zaks for (BasicBlock *BB : TheLoop->blocks()) { 1263d57d73daSDorit Nuzman if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { 12649e97caf5SRenato Golin reportVectorizationFailure( 12659e97caf5SRenato Golin "Cannot fold tail by masking as required", 12669e97caf5SRenato Golin "control flow cannot be substituted for a select", 1267ec818d7fSHideki Saito "NoCFGForSelect", ORE, TheLoop, 1268ec818d7fSHideki Saito BB->getTerminator()); 1269b0b5312eSAyal Zaks return false; 1270b0b5312eSAyal Zaks } 1271b0b5312eSAyal Zaks } 1272b0b5312eSAyal Zaks 1273b0b5312eSAyal Zaks LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); 1274b0b5312eSAyal Zaks return true; 1275b0b5312eSAyal Zaks } 1276b0b5312eSAyal Zaks 1277f2ec16ccSHideki Saito } // namespace llvm 1278