1f2ec16ccSHideki Saito //===- LoopVectorizationLegality.cpp --------------------------------------===// 2f2ec16ccSHideki Saito // 32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information. 52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6f2ec16ccSHideki Saito // 7f2ec16ccSHideki Saito //===----------------------------------------------------------------------===// 8f2ec16ccSHideki Saito // 9f2ec16ccSHideki Saito // This file provides loop vectorization legality analysis. Original code 10f2ec16ccSHideki Saito // resided in LoopVectorize.cpp for a long time. 11f2ec16ccSHideki Saito // 12f2ec16ccSHideki Saito // At this point, it is implemented as a utility class, not as an analysis 13f2ec16ccSHideki Saito // pass. It should be easy to create an analysis pass around it if there 14f2ec16ccSHideki Saito // is a need (but D45420 needs to happen first). 15f2ec16ccSHideki Saito // 16ec818d7fSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorize.h" 17f2ec16ccSHideki Saito #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" 187403569bSPhilip Reames #include "llvm/Analysis/Loads.h" 197403569bSPhilip Reames #include "llvm/Analysis/ValueTracking.h" 20f2ec16ccSHideki Saito #include "llvm/Analysis/VectorUtils.h" 21f2ec16ccSHideki Saito #include "llvm/IR/IntrinsicInst.h" 22f2ec16ccSHideki Saito 23f2ec16ccSHideki Saito using namespace llvm; 24f2ec16ccSHideki Saito 25f2ec16ccSHideki Saito #define LV_NAME "loop-vectorize" 26f2ec16ccSHideki Saito #define DEBUG_TYPE LV_NAME 27f2ec16ccSHideki Saito 284e4ecae0SHideki Saito extern cl::opt<bool> EnableVPlanPredication; 294e4ecae0SHideki Saito 30f2ec16ccSHideki Saito static cl::opt<bool> 31f2ec16ccSHideki Saito EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, 32f2ec16ccSHideki Saito cl::desc("Enable if-conversion during vectorization.")); 33f2ec16ccSHideki Saito 34f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeMemoryCheckThreshold( 35f2ec16ccSHideki Saito "pragma-vectorize-memory-check-threshold", cl::init(128), cl::Hidden, 36f2ec16ccSHideki Saito cl::desc("The maximum allowed number of runtime memory checks with a " 37f2ec16ccSHideki Saito "vectorize(enable) pragma.")); 38f2ec16ccSHideki Saito 39f2ec16ccSHideki Saito static cl::opt<unsigned> VectorizeSCEVCheckThreshold( 40f2ec16ccSHideki Saito "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, 41f2ec16ccSHideki Saito cl::desc("The maximum number of SCEV checks allowed.")); 42f2ec16ccSHideki Saito 43f2ec16ccSHideki Saito static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold( 44f2ec16ccSHideki Saito "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, 45f2ec16ccSHideki Saito cl::desc("The maximum number of SCEV checks allowed with a " 46f2ec16ccSHideki Saito "vectorize(enable) pragma")); 47f2ec16ccSHideki Saito 48f2ec16ccSHideki Saito /// Maximum vectorization interleave count. 49f2ec16ccSHideki Saito static const unsigned MaxInterleaveFactor = 16; 50f2ec16ccSHideki Saito 51f2ec16ccSHideki Saito namespace llvm { 52f2ec16ccSHideki Saito 53f2ec16ccSHideki Saito bool LoopVectorizeHints::Hint::validate(unsigned Val) { 54f2ec16ccSHideki Saito switch (Kind) { 55f2ec16ccSHideki Saito case HK_WIDTH: 56f2ec16ccSHideki Saito return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; 57f2ec16ccSHideki Saito case HK_UNROLL: 58f2ec16ccSHideki Saito return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; 59f2ec16ccSHideki Saito case HK_FORCE: 60f2ec16ccSHideki Saito return (Val <= 1); 61f2ec16ccSHideki Saito case HK_ISVECTORIZED: 6220b198ecSSjoerd Meijer case HK_PREDICATE: 63f2ec16ccSHideki Saito return (Val == 0 || Val == 1); 64f2ec16ccSHideki Saito } 65f2ec16ccSHideki Saito return false; 66f2ec16ccSHideki Saito } 67f2ec16ccSHideki Saito 68d4eb13c8SMichael Kruse LoopVectorizeHints::LoopVectorizeHints(const Loop *L, 69d4eb13c8SMichael Kruse bool InterleaveOnlyWhenForced, 70f2ec16ccSHideki Saito OptimizationRemarkEmitter &ORE) 71f2ec16ccSHideki Saito : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), 72d4eb13c8SMichael Kruse Interleave("interleave.count", InterleaveOnlyWhenForced, HK_UNROLL), 73f2ec16ccSHideki Saito Force("vectorize.enable", FK_Undefined, HK_FORCE), 7420b198ecSSjoerd Meijer IsVectorized("isvectorized", 0, HK_ISVECTORIZED), 75cb47b878SSjoerd Meijer Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), TheLoop(L), 7620b198ecSSjoerd Meijer ORE(ORE) { 77f2ec16ccSHideki Saito // Populate values with existing loop metadata. 78f2ec16ccSHideki Saito getHintsFromMetadata(); 79f2ec16ccSHideki Saito 80f2ec16ccSHideki Saito // force-vector-interleave overrides DisableInterleaving. 81f2ec16ccSHideki Saito if (VectorizerParams::isInterleaveForced()) 82f2ec16ccSHideki Saito Interleave.Value = VectorizerParams::VectorizationInterleave; 83f2ec16ccSHideki Saito 84f2ec16ccSHideki Saito if (IsVectorized.Value != 1) 85f2ec16ccSHideki Saito // If the vectorization width and interleaving count are both 1 then 86f2ec16ccSHideki Saito // consider the loop to have been already vectorized because there's 87f2ec16ccSHideki Saito // nothing more that we can do. 88f2ec16ccSHideki Saito IsVectorized.Value = Width.Value == 1 && Interleave.Value == 1; 89d4eb13c8SMichael Kruse LLVM_DEBUG(if (InterleaveOnlyWhenForced && Interleave.Value == 1) dbgs() 90f2ec16ccSHideki Saito << "LV: Interleaving disabled by the pass manager\n"); 91f2ec16ccSHideki Saito } 92f2ec16ccSHideki Saito 9377a614a6SMichael Kruse void LoopVectorizeHints::setAlreadyVectorized() { 9477a614a6SMichael Kruse LLVMContext &Context = TheLoop->getHeader()->getContext(); 9577a614a6SMichael Kruse 9677a614a6SMichael Kruse MDNode *IsVectorizedMD = MDNode::get( 9777a614a6SMichael Kruse Context, 9877a614a6SMichael Kruse {MDString::get(Context, "llvm.loop.isvectorized"), 9977a614a6SMichael Kruse ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); 10077a614a6SMichael Kruse MDNode *LoopID = TheLoop->getLoopID(); 10177a614a6SMichael Kruse MDNode *NewLoopID = 10277a614a6SMichael Kruse makePostTransformationMetadata(Context, LoopID, 10377a614a6SMichael Kruse {Twine(Prefix(), "vectorize.").str(), 10477a614a6SMichael Kruse Twine(Prefix(), "interleave.").str()}, 10577a614a6SMichael Kruse {IsVectorizedMD}); 10677a614a6SMichael Kruse TheLoop->setLoopID(NewLoopID); 10777a614a6SMichael Kruse 10877a614a6SMichael Kruse // Update internal cache. 10977a614a6SMichael Kruse IsVectorized.Value = 1; 11077a614a6SMichael Kruse } 11177a614a6SMichael Kruse 112d4eb13c8SMichael Kruse bool LoopVectorizeHints::allowVectorization( 113d4eb13c8SMichael Kruse Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { 114f2ec16ccSHideki Saito if (getForce() == LoopVectorizeHints::FK_Disabled) { 115d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); 116f2ec16ccSHideki Saito emitRemarkWithHints(); 117f2ec16ccSHideki Saito return false; 118f2ec16ccSHideki Saito } 119f2ec16ccSHideki Saito 120d4eb13c8SMichael Kruse if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { 121d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); 122f2ec16ccSHideki Saito emitRemarkWithHints(); 123f2ec16ccSHideki Saito return false; 124f2ec16ccSHideki Saito } 125f2ec16ccSHideki Saito 126f2ec16ccSHideki Saito if (getIsVectorized() == 1) { 127d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); 128f2ec16ccSHideki Saito // FIXME: Add interleave.disable metadata. This will allow 129f2ec16ccSHideki Saito // vectorize.disable to be used without disabling the pass and errors 130f2ec16ccSHideki Saito // to differentiate between disabled vectorization and a width of 1. 131f2ec16ccSHideki Saito ORE.emit([&]() { 132f2ec16ccSHideki Saito return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), 133f2ec16ccSHideki Saito "AllDisabled", L->getStartLoc(), 134f2ec16ccSHideki Saito L->getHeader()) 135f2ec16ccSHideki Saito << "loop not vectorized: vectorization and interleaving are " 136f2ec16ccSHideki Saito "explicitly disabled, or the loop has already been " 137f2ec16ccSHideki Saito "vectorized"; 138f2ec16ccSHideki Saito }); 139f2ec16ccSHideki Saito return false; 140f2ec16ccSHideki Saito } 141f2ec16ccSHideki Saito 142f2ec16ccSHideki Saito return true; 143f2ec16ccSHideki Saito } 144f2ec16ccSHideki Saito 145f2ec16ccSHideki Saito void LoopVectorizeHints::emitRemarkWithHints() const { 146f2ec16ccSHideki Saito using namespace ore; 147f2ec16ccSHideki Saito 148f2ec16ccSHideki Saito ORE.emit([&]() { 149f2ec16ccSHideki Saito if (Force.Value == LoopVectorizeHints::FK_Disabled) 150f2ec16ccSHideki Saito return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", 151f2ec16ccSHideki Saito TheLoop->getStartLoc(), 152f2ec16ccSHideki Saito TheLoop->getHeader()) 153f2ec16ccSHideki Saito << "loop not vectorized: vectorization is explicitly disabled"; 154f2ec16ccSHideki Saito else { 155f2ec16ccSHideki Saito OptimizationRemarkMissed R(LV_NAME, "MissedDetails", 156f2ec16ccSHideki Saito TheLoop->getStartLoc(), TheLoop->getHeader()); 157f2ec16ccSHideki Saito R << "loop not vectorized"; 158f2ec16ccSHideki Saito if (Force.Value == LoopVectorizeHints::FK_Enabled) { 159f2ec16ccSHideki Saito R << " (Force=" << NV("Force", true); 160f2ec16ccSHideki Saito if (Width.Value != 0) 161f2ec16ccSHideki Saito R << ", Vector Width=" << NV("VectorWidth", Width.Value); 162f2ec16ccSHideki Saito if (Interleave.Value != 0) 163f2ec16ccSHideki Saito R << ", Interleave Count=" << NV("InterleaveCount", Interleave.Value); 164f2ec16ccSHideki Saito R << ")"; 165f2ec16ccSHideki Saito } 166f2ec16ccSHideki Saito return R; 167f2ec16ccSHideki Saito } 168f2ec16ccSHideki Saito }); 169f2ec16ccSHideki Saito } 170f2ec16ccSHideki Saito 171f2ec16ccSHideki Saito const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { 172f2ec16ccSHideki Saito if (getWidth() == 1) 173f2ec16ccSHideki Saito return LV_NAME; 174f2ec16ccSHideki Saito if (getForce() == LoopVectorizeHints::FK_Disabled) 175f2ec16ccSHideki Saito return LV_NAME; 176f2ec16ccSHideki Saito if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth() == 0) 177f2ec16ccSHideki Saito return LV_NAME; 178f2ec16ccSHideki Saito return OptimizationRemarkAnalysis::AlwaysPrint; 179f2ec16ccSHideki Saito } 180f2ec16ccSHideki Saito 181f2ec16ccSHideki Saito void LoopVectorizeHints::getHintsFromMetadata() { 182f2ec16ccSHideki Saito MDNode *LoopID = TheLoop->getLoopID(); 183f2ec16ccSHideki Saito if (!LoopID) 184f2ec16ccSHideki Saito return; 185f2ec16ccSHideki Saito 186f2ec16ccSHideki Saito // First operand should refer to the loop id itself. 187f2ec16ccSHideki Saito assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 188f2ec16ccSHideki Saito assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 189f2ec16ccSHideki Saito 190f2ec16ccSHideki Saito for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { 191f2ec16ccSHideki Saito const MDString *S = nullptr; 192f2ec16ccSHideki Saito SmallVector<Metadata *, 4> Args; 193f2ec16ccSHideki Saito 194f2ec16ccSHideki Saito // The expected hint is either a MDString or a MDNode with the first 195f2ec16ccSHideki Saito // operand a MDString. 196f2ec16ccSHideki Saito if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) { 197f2ec16ccSHideki Saito if (!MD || MD->getNumOperands() == 0) 198f2ec16ccSHideki Saito continue; 199f2ec16ccSHideki Saito S = dyn_cast<MDString>(MD->getOperand(0)); 200f2ec16ccSHideki Saito for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) 201f2ec16ccSHideki Saito Args.push_back(MD->getOperand(i)); 202f2ec16ccSHideki Saito } else { 203f2ec16ccSHideki Saito S = dyn_cast<MDString>(LoopID->getOperand(i)); 204f2ec16ccSHideki Saito assert(Args.size() == 0 && "too many arguments for MDString"); 205f2ec16ccSHideki Saito } 206f2ec16ccSHideki Saito 207f2ec16ccSHideki Saito if (!S) 208f2ec16ccSHideki Saito continue; 209f2ec16ccSHideki Saito 210f2ec16ccSHideki Saito // Check if the hint starts with the loop metadata prefix. 211f2ec16ccSHideki Saito StringRef Name = S->getString(); 212f2ec16ccSHideki Saito if (Args.size() == 1) 213f2ec16ccSHideki Saito setHint(Name, Args[0]); 214f2ec16ccSHideki Saito } 215f2ec16ccSHideki Saito } 216f2ec16ccSHideki Saito 217f2ec16ccSHideki Saito void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { 218f2ec16ccSHideki Saito if (!Name.startswith(Prefix())) 219f2ec16ccSHideki Saito return; 220f2ec16ccSHideki Saito Name = Name.substr(Prefix().size(), StringRef::npos); 221f2ec16ccSHideki Saito 222f2ec16ccSHideki Saito const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg); 223f2ec16ccSHideki Saito if (!C) 224f2ec16ccSHideki Saito return; 225f2ec16ccSHideki Saito unsigned Val = C->getZExtValue(); 226f2ec16ccSHideki Saito 22720b198ecSSjoerd Meijer Hint *Hints[] = {&Width, &Interleave, &Force, &IsVectorized, &Predicate}; 228f2ec16ccSHideki Saito for (auto H : Hints) { 229f2ec16ccSHideki Saito if (Name == H->Name) { 230f2ec16ccSHideki Saito if (H->validate(Val)) 231f2ec16ccSHideki Saito H->Value = Val; 232f2ec16ccSHideki Saito else 233d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); 234f2ec16ccSHideki Saito break; 235f2ec16ccSHideki Saito } 236f2ec16ccSHideki Saito } 237f2ec16ccSHideki Saito } 238f2ec16ccSHideki Saito 239f2ec16ccSHideki Saito bool LoopVectorizationRequirements::doesNotMeet( 240f2ec16ccSHideki Saito Function *F, Loop *L, const LoopVectorizeHints &Hints) { 241f2ec16ccSHideki Saito const char *PassName = Hints.vectorizeAnalysisPassName(); 242f2ec16ccSHideki Saito bool Failed = false; 243f2ec16ccSHideki Saito if (UnsafeAlgebraInst && !Hints.allowReordering()) { 244f2ec16ccSHideki Saito ORE.emit([&]() { 245f2ec16ccSHideki Saito return OptimizationRemarkAnalysisFPCommute( 246f2ec16ccSHideki Saito PassName, "CantReorderFPOps", UnsafeAlgebraInst->getDebugLoc(), 247f2ec16ccSHideki Saito UnsafeAlgebraInst->getParent()) 248f2ec16ccSHideki Saito << "loop not vectorized: cannot prove it is safe to reorder " 249f2ec16ccSHideki Saito "floating-point operations"; 250f2ec16ccSHideki Saito }); 251f2ec16ccSHideki Saito Failed = true; 252f2ec16ccSHideki Saito } 253f2ec16ccSHideki Saito 254f2ec16ccSHideki Saito // Test if runtime memcheck thresholds are exceeded. 255f2ec16ccSHideki Saito bool PragmaThresholdReached = 256f2ec16ccSHideki Saito NumRuntimePointerChecks > PragmaVectorizeMemoryCheckThreshold; 257f2ec16ccSHideki Saito bool ThresholdReached = 258f2ec16ccSHideki Saito NumRuntimePointerChecks > VectorizerParams::RuntimeMemoryCheckThreshold; 259f2ec16ccSHideki Saito if ((ThresholdReached && !Hints.allowReordering()) || 260f2ec16ccSHideki Saito PragmaThresholdReached) { 261f2ec16ccSHideki Saito ORE.emit([&]() { 262f2ec16ccSHideki Saito return OptimizationRemarkAnalysisAliasing(PassName, "CantReorderMemOps", 263f2ec16ccSHideki Saito L->getStartLoc(), 264f2ec16ccSHideki Saito L->getHeader()) 265f2ec16ccSHideki Saito << "loop not vectorized: cannot prove it is safe to reorder " 266f2ec16ccSHideki Saito "memory operations"; 267f2ec16ccSHideki Saito }); 268d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Too many memory checks needed.\n"); 269f2ec16ccSHideki Saito Failed = true; 270f2ec16ccSHideki Saito } 271f2ec16ccSHideki Saito 272f2ec16ccSHideki Saito return Failed; 273f2ec16ccSHideki Saito } 274f2ec16ccSHideki Saito 275f2ec16ccSHideki Saito // Return true if the inner loop \p Lp is uniform with regard to the outer loop 276f2ec16ccSHideki Saito // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes 277f2ec16ccSHideki Saito // executing the inner loop will execute the same iterations). This check is 278f2ec16ccSHideki Saito // very constrained for now but it will be relaxed in the future. \p Lp is 279f2ec16ccSHideki Saito // considered uniform if it meets all the following conditions: 280f2ec16ccSHideki Saito // 1) it has a canonical IV (starting from 0 and with stride 1), 281f2ec16ccSHideki Saito // 2) its latch terminator is a conditional branch and, 282f2ec16ccSHideki Saito // 3) its latch condition is a compare instruction whose operands are the 283f2ec16ccSHideki Saito // canonical IV and an OuterLp invariant. 284f2ec16ccSHideki Saito // This check doesn't take into account the uniformity of other conditions not 285f2ec16ccSHideki Saito // related to the loop latch because they don't affect the loop uniformity. 286f2ec16ccSHideki Saito // 287f2ec16ccSHideki Saito // NOTE: We decided to keep all these checks and its associated documentation 288f2ec16ccSHideki Saito // together so that we can easily have a picture of the current supported loop 289f2ec16ccSHideki Saito // nests. However, some of the current checks don't depend on \p OuterLp and 290f2ec16ccSHideki Saito // would be redundantly executed for each \p Lp if we invoked this function for 291f2ec16ccSHideki Saito // different candidate outer loops. This is not the case for now because we 292f2ec16ccSHideki Saito // don't currently have the infrastructure to evaluate multiple candidate outer 293f2ec16ccSHideki Saito // loops and \p OuterLp will be a fixed parameter while we only support explicit 294f2ec16ccSHideki Saito // outer loop vectorization. It's also very likely that these checks go away 295f2ec16ccSHideki Saito // before introducing the aforementioned infrastructure. However, if this is not 296f2ec16ccSHideki Saito // the case, we should move the \p OuterLp independent checks to a separate 297f2ec16ccSHideki Saito // function that is only executed once for each \p Lp. 298f2ec16ccSHideki Saito static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { 299f2ec16ccSHideki Saito assert(Lp->getLoopLatch() && "Expected loop with a single latch."); 300f2ec16ccSHideki Saito 301f2ec16ccSHideki Saito // If Lp is the outer loop, it's uniform by definition. 302f2ec16ccSHideki Saito if (Lp == OuterLp) 303f2ec16ccSHideki Saito return true; 304f2ec16ccSHideki Saito assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); 305f2ec16ccSHideki Saito 306f2ec16ccSHideki Saito // 1. 307f2ec16ccSHideki Saito PHINode *IV = Lp->getCanonicalInductionVariable(); 308f2ec16ccSHideki Saito if (!IV) { 309d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); 310f2ec16ccSHideki Saito return false; 311f2ec16ccSHideki Saito } 312f2ec16ccSHideki Saito 313f2ec16ccSHideki Saito // 2. 314f2ec16ccSHideki Saito BasicBlock *Latch = Lp->getLoopLatch(); 315f2ec16ccSHideki Saito auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 316f2ec16ccSHideki Saito if (!LatchBr || LatchBr->isUnconditional()) { 317d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); 318f2ec16ccSHideki Saito return false; 319f2ec16ccSHideki Saito } 320f2ec16ccSHideki Saito 321f2ec16ccSHideki Saito // 3. 322f2ec16ccSHideki Saito auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); 323f2ec16ccSHideki Saito if (!LatchCmp) { 324d34e60caSNicola Zaghen LLVM_DEBUG( 325d34e60caSNicola Zaghen dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); 326f2ec16ccSHideki Saito return false; 327f2ec16ccSHideki Saito } 328f2ec16ccSHideki Saito 329f2ec16ccSHideki Saito Value *CondOp0 = LatchCmp->getOperand(0); 330f2ec16ccSHideki Saito Value *CondOp1 = LatchCmp->getOperand(1); 331f2ec16ccSHideki Saito Value *IVUpdate = IV->getIncomingValueForBlock(Latch); 332f2ec16ccSHideki Saito if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && 333f2ec16ccSHideki Saito !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { 334d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); 335f2ec16ccSHideki Saito return false; 336f2ec16ccSHideki Saito } 337f2ec16ccSHideki Saito 338f2ec16ccSHideki Saito return true; 339f2ec16ccSHideki Saito } 340f2ec16ccSHideki Saito 341f2ec16ccSHideki Saito // Return true if \p Lp and all its nested loops are uniform with regard to \p 342f2ec16ccSHideki Saito // OuterLp. 343f2ec16ccSHideki Saito static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { 344f2ec16ccSHideki Saito if (!isUniformLoop(Lp, OuterLp)) 345f2ec16ccSHideki Saito return false; 346f2ec16ccSHideki Saito 347f2ec16ccSHideki Saito // Check if nested loops are uniform. 348f2ec16ccSHideki Saito for (Loop *SubLp : *Lp) 349f2ec16ccSHideki Saito if (!isUniformLoopNest(SubLp, OuterLp)) 350f2ec16ccSHideki Saito return false; 351f2ec16ccSHideki Saito 352f2ec16ccSHideki Saito return true; 353f2ec16ccSHideki Saito } 354f2ec16ccSHideki Saito 3555f8f34e4SAdrian Prantl /// Check whether it is safe to if-convert this phi node. 356f2ec16ccSHideki Saito /// 357f2ec16ccSHideki Saito /// Phi nodes with constant expressions that can trap are not safe to if 358f2ec16ccSHideki Saito /// convert. 359f2ec16ccSHideki Saito static bool canIfConvertPHINodes(BasicBlock *BB) { 360f2ec16ccSHideki Saito for (PHINode &Phi : BB->phis()) { 361f2ec16ccSHideki Saito for (Value *V : Phi.incoming_values()) 362f2ec16ccSHideki Saito if (auto *C = dyn_cast<Constant>(V)) 363f2ec16ccSHideki Saito if (C->canTrap()) 364f2ec16ccSHideki Saito return false; 365f2ec16ccSHideki Saito } 366f2ec16ccSHideki Saito return true; 367f2ec16ccSHideki Saito } 368f2ec16ccSHideki Saito 369f2ec16ccSHideki Saito static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { 370f2ec16ccSHideki Saito if (Ty->isPointerTy()) 371f2ec16ccSHideki Saito return DL.getIntPtrType(Ty); 372f2ec16ccSHideki Saito 373f2ec16ccSHideki Saito // It is possible that char's or short's overflow when we ask for the loop's 374f2ec16ccSHideki Saito // trip count, work around this by changing the type size. 375f2ec16ccSHideki Saito if (Ty->getScalarSizeInBits() < 32) 376f2ec16ccSHideki Saito return Type::getInt32Ty(Ty->getContext()); 377f2ec16ccSHideki Saito 378f2ec16ccSHideki Saito return Ty; 379f2ec16ccSHideki Saito } 380f2ec16ccSHideki Saito 381f2ec16ccSHideki Saito static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { 382f2ec16ccSHideki Saito Ty0 = convertPointerToIntegerType(DL, Ty0); 383f2ec16ccSHideki Saito Ty1 = convertPointerToIntegerType(DL, Ty1); 384f2ec16ccSHideki Saito if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) 385f2ec16ccSHideki Saito return Ty0; 386f2ec16ccSHideki Saito return Ty1; 387f2ec16ccSHideki Saito } 388f2ec16ccSHideki Saito 3895f8f34e4SAdrian Prantl /// Check that the instruction has outside loop users and is not an 390f2ec16ccSHideki Saito /// identified reduction variable. 391f2ec16ccSHideki Saito static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, 392f2ec16ccSHideki Saito SmallPtrSetImpl<Value *> &AllowedExit) { 39360a1e4ddSAnna Thomas // Reductions, Inductions and non-header phis are allowed to have exit users. All 394f2ec16ccSHideki Saito // other instructions must not have external users. 395f2ec16ccSHideki Saito if (!AllowedExit.count(Inst)) 396f2ec16ccSHideki Saito // Check that all of the users of the loop are inside the BB. 397f2ec16ccSHideki Saito for (User *U : Inst->users()) { 398f2ec16ccSHideki Saito Instruction *UI = cast<Instruction>(U); 399f2ec16ccSHideki Saito // This user may be a reduction exit value. 400f2ec16ccSHideki Saito if (!TheLoop->contains(UI)) { 401d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); 402f2ec16ccSHideki Saito return true; 403f2ec16ccSHideki Saito } 404f2ec16ccSHideki Saito } 405f2ec16ccSHideki Saito return false; 406f2ec16ccSHideki Saito } 407f2ec16ccSHideki Saito 408f2ec16ccSHideki Saito int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { 409f2ec16ccSHideki Saito const ValueToValueMap &Strides = 410f2ec16ccSHideki Saito getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap(); 411f2ec16ccSHideki Saito 412d1170dbeSSjoerd Meijer bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize(); 413d1170dbeSSjoerd Meijer int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false); 414f2ec16ccSHideki Saito if (Stride == 1 || Stride == -1) 415f2ec16ccSHideki Saito return Stride; 416f2ec16ccSHideki Saito return 0; 417f2ec16ccSHideki Saito } 418f2ec16ccSHideki Saito 419f2ec16ccSHideki Saito bool LoopVectorizationLegality::isUniform(Value *V) { 420f2ec16ccSHideki Saito return LAI->isUniform(V); 421f2ec16ccSHideki Saito } 422f2ec16ccSHideki Saito 423f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeOuterLoop() { 424f2ec16ccSHideki Saito assert(!TheLoop->empty() && "We are not vectorizing an outer loop."); 425f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 426f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 427f2ec16ccSHideki Saito bool Result = true; 428f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 429f2ec16ccSHideki Saito 430f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 431f2ec16ccSHideki Saito // Check whether the BB terminator is a BranchInst. Any other terminator is 432f2ec16ccSHideki Saito // not supported yet. 433f2ec16ccSHideki Saito auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); 434f2ec16ccSHideki Saito if (!Br) { 4359e97caf5SRenato Golin reportVectorizationFailure("Unsupported basic block terminator", 4369e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 437ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 438f2ec16ccSHideki Saito if (DoExtraAnalysis) 439f2ec16ccSHideki Saito Result = false; 440f2ec16ccSHideki Saito else 441f2ec16ccSHideki Saito return false; 442f2ec16ccSHideki Saito } 443f2ec16ccSHideki Saito 444f2ec16ccSHideki Saito // Check whether the BranchInst is a supported one. Only unconditional 445f2ec16ccSHideki Saito // branches, conditional branches with an outer loop invariant condition or 446f2ec16ccSHideki Saito // backedges are supported. 4474e4ecae0SHideki Saito // FIXME: We skip these checks when VPlan predication is enabled as we 4484e4ecae0SHideki Saito // want to allow divergent branches. This whole check will be removed 4494e4ecae0SHideki Saito // once VPlan predication is on by default. 4504e4ecae0SHideki Saito if (!EnableVPlanPredication && Br && Br->isConditional() && 451f2ec16ccSHideki Saito !TheLoop->isLoopInvariant(Br->getCondition()) && 452f2ec16ccSHideki Saito !LI->isLoopHeader(Br->getSuccessor(0)) && 453f2ec16ccSHideki Saito !LI->isLoopHeader(Br->getSuccessor(1))) { 4549e97caf5SRenato Golin reportVectorizationFailure("Unsupported conditional branch", 4559e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 456ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 457f2ec16ccSHideki Saito if (DoExtraAnalysis) 458f2ec16ccSHideki Saito Result = false; 459f2ec16ccSHideki Saito else 460f2ec16ccSHideki Saito return false; 461f2ec16ccSHideki Saito } 462f2ec16ccSHideki Saito } 463f2ec16ccSHideki Saito 464f2ec16ccSHideki Saito // Check whether inner loops are uniform. At this point, we only support 465f2ec16ccSHideki Saito // simple outer loops scenarios with uniform nested loops. 466f2ec16ccSHideki Saito if (!isUniformLoopNest(TheLoop /*loop nest*/, 467f2ec16ccSHideki Saito TheLoop /*context outer loop*/)) { 4689e97caf5SRenato Golin reportVectorizationFailure("Outer loop contains divergent loops", 4699e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 470ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 471f2ec16ccSHideki Saito if (DoExtraAnalysis) 472f2ec16ccSHideki Saito Result = false; 473f2ec16ccSHideki Saito else 474f2ec16ccSHideki Saito return false; 475f2ec16ccSHideki Saito } 476f2ec16ccSHideki Saito 477ea7f3035SHideki Saito // Check whether we are able to set up outer loop induction. 478ea7f3035SHideki Saito if (!setupOuterLoopInductions()) { 4799e97caf5SRenato Golin reportVectorizationFailure("Unsupported outer loop Phi(s)", 4809e97caf5SRenato Golin "Unsupported outer loop Phi(s)", 481ec818d7fSHideki Saito "UnsupportedPhi", ORE, TheLoop); 482ea7f3035SHideki Saito if (DoExtraAnalysis) 483ea7f3035SHideki Saito Result = false; 484ea7f3035SHideki Saito else 485ea7f3035SHideki Saito return false; 486ea7f3035SHideki Saito } 487ea7f3035SHideki Saito 488f2ec16ccSHideki Saito return Result; 489f2ec16ccSHideki Saito } 490f2ec16ccSHideki Saito 491f2ec16ccSHideki Saito void LoopVectorizationLegality::addInductionPhi( 492f2ec16ccSHideki Saito PHINode *Phi, const InductionDescriptor &ID, 493f2ec16ccSHideki Saito SmallPtrSetImpl<Value *> &AllowedExit) { 494f2ec16ccSHideki Saito Inductions[Phi] = ID; 495f2ec16ccSHideki Saito 496f2ec16ccSHideki Saito // In case this induction also comes with casts that we know we can ignore 497f2ec16ccSHideki Saito // in the vectorized loop body, record them here. All casts could be recorded 498f2ec16ccSHideki Saito // here for ignoring, but suffices to record only the first (as it is the 499f2ec16ccSHideki Saito // only one that may bw used outside the cast sequence). 500f2ec16ccSHideki Saito const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts(); 501f2ec16ccSHideki Saito if (!Casts.empty()) 502f2ec16ccSHideki Saito InductionCastsToIgnore.insert(*Casts.begin()); 503f2ec16ccSHideki Saito 504f2ec16ccSHideki Saito Type *PhiTy = Phi->getType(); 505f2ec16ccSHideki Saito const DataLayout &DL = Phi->getModule()->getDataLayout(); 506f2ec16ccSHideki Saito 507f2ec16ccSHideki Saito // Get the widest type. 508f2ec16ccSHideki Saito if (!PhiTy->isFloatingPointTy()) { 509f2ec16ccSHideki Saito if (!WidestIndTy) 510f2ec16ccSHideki Saito WidestIndTy = convertPointerToIntegerType(DL, PhiTy); 511f2ec16ccSHideki Saito else 512f2ec16ccSHideki Saito WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); 513f2ec16ccSHideki Saito } 514f2ec16ccSHideki Saito 515f2ec16ccSHideki Saito // Int inductions are special because we only allow one IV. 516f2ec16ccSHideki Saito if (ID.getKind() == InductionDescriptor::IK_IntInduction && 517f2ec16ccSHideki Saito ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && 518f2ec16ccSHideki Saito isa<Constant>(ID.getStartValue()) && 519f2ec16ccSHideki Saito cast<Constant>(ID.getStartValue())->isNullValue()) { 520f2ec16ccSHideki Saito 521f2ec16ccSHideki Saito // Use the phi node with the widest type as induction. Use the last 522f2ec16ccSHideki Saito // one if there are multiple (no good reason for doing this other 523f2ec16ccSHideki Saito // than it is expedient). We've checked that it begins at zero and 524f2ec16ccSHideki Saito // steps by one, so this is a canonical induction variable. 525f2ec16ccSHideki Saito if (!PrimaryInduction || PhiTy == WidestIndTy) 526f2ec16ccSHideki Saito PrimaryInduction = Phi; 527f2ec16ccSHideki Saito } 528f2ec16ccSHideki Saito 529f2ec16ccSHideki Saito // Both the PHI node itself, and the "post-increment" value feeding 530f2ec16ccSHideki Saito // back into the PHI node may have external users. 531f2ec16ccSHideki Saito // We can allow those uses, except if the SCEVs we have for them rely 532f2ec16ccSHideki Saito // on predicates that only hold within the loop, since allowing the exit 5336a1dd77fSAnna Thomas // currently means re-using this SCEV outside the loop (see PR33706 for more 5346a1dd77fSAnna Thomas // details). 535f2ec16ccSHideki Saito if (PSE.getUnionPredicate().isAlwaysTrue()) { 536f2ec16ccSHideki Saito AllowedExit.insert(Phi); 537f2ec16ccSHideki Saito AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); 538f2ec16ccSHideki Saito } 539f2ec16ccSHideki Saito 540d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); 541f2ec16ccSHideki Saito } 542f2ec16ccSHideki Saito 543ea7f3035SHideki Saito bool LoopVectorizationLegality::setupOuterLoopInductions() { 544ea7f3035SHideki Saito BasicBlock *Header = TheLoop->getHeader(); 545ea7f3035SHideki Saito 546ea7f3035SHideki Saito // Returns true if a given Phi is a supported induction. 547ea7f3035SHideki Saito auto isSupportedPhi = [&](PHINode &Phi) -> bool { 548ea7f3035SHideki Saito InductionDescriptor ID; 549ea7f3035SHideki Saito if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && 550ea7f3035SHideki Saito ID.getKind() == InductionDescriptor::IK_IntInduction) { 551ea7f3035SHideki Saito addInductionPhi(&Phi, ID, AllowedExit); 552ea7f3035SHideki Saito return true; 553ea7f3035SHideki Saito } else { 554ea7f3035SHideki Saito // Bail out for any Phi in the outer loop header that is not a supported 555ea7f3035SHideki Saito // induction. 556ea7f3035SHideki Saito LLVM_DEBUG( 557ea7f3035SHideki Saito dbgs() 558ea7f3035SHideki Saito << "LV: Found unsupported PHI for outer loop vectorization.\n"); 559ea7f3035SHideki Saito return false; 560ea7f3035SHideki Saito } 561ea7f3035SHideki Saito }; 562ea7f3035SHideki Saito 563ea7f3035SHideki Saito if (llvm::all_of(Header->phis(), isSupportedPhi)) 564ea7f3035SHideki Saito return true; 565ea7f3035SHideki Saito else 566ea7f3035SHideki Saito return false; 567ea7f3035SHideki Saito } 568ea7f3035SHideki Saito 569f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeInstrs() { 570f2ec16ccSHideki Saito BasicBlock *Header = TheLoop->getHeader(); 571f2ec16ccSHideki Saito 572f2ec16ccSHideki Saito // Look for the attribute signaling the absence of NaNs. 573f2ec16ccSHideki Saito Function &F = *Header->getParent(); 574f2ec16ccSHideki Saito HasFunNoNaNAttr = 575f2ec16ccSHideki Saito F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true"; 576f2ec16ccSHideki Saito 577f2ec16ccSHideki Saito // For each block in the loop. 578f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 579f2ec16ccSHideki Saito // Scan the instructions in the block and look for hazards. 580f2ec16ccSHideki Saito for (Instruction &I : *BB) { 581f2ec16ccSHideki Saito if (auto *Phi = dyn_cast<PHINode>(&I)) { 582f2ec16ccSHideki Saito Type *PhiTy = Phi->getType(); 583f2ec16ccSHideki Saito // Check that this PHI type is allowed. 584f2ec16ccSHideki Saito if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 585f2ec16ccSHideki Saito !PhiTy->isPointerTy()) { 5869e97caf5SRenato Golin reportVectorizationFailure("Found a non-int non-pointer PHI", 5879e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 588ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 589f2ec16ccSHideki Saito return false; 590f2ec16ccSHideki Saito } 591f2ec16ccSHideki Saito 592f2ec16ccSHideki Saito // If this PHINode is not in the header block, then we know that we 593f2ec16ccSHideki Saito // can convert it to select during if-conversion. No need to check if 594f2ec16ccSHideki Saito // the PHIs in this block are induction or reduction variables. 595f2ec16ccSHideki Saito if (BB != Header) { 59660a1e4ddSAnna Thomas // Non-header phi nodes that have outside uses can be vectorized. Add 59760a1e4ddSAnna Thomas // them to the list of allowed exits. 59860a1e4ddSAnna Thomas // Unsafe cyclic dependencies with header phis are identified during 59960a1e4ddSAnna Thomas // legalization for reduction, induction and first order 60060a1e4ddSAnna Thomas // recurrences. 601dd18ce45SBjorn Pettersson AllowedExit.insert(&I); 602f2ec16ccSHideki Saito continue; 603f2ec16ccSHideki Saito } 604f2ec16ccSHideki Saito 605f2ec16ccSHideki Saito // We only allow if-converted PHIs with exactly two incoming values. 606f2ec16ccSHideki Saito if (Phi->getNumIncomingValues() != 2) { 6079e97caf5SRenato Golin reportVectorizationFailure("Found an invalid PHI", 6089e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 609ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop, Phi); 610f2ec16ccSHideki Saito return false; 611f2ec16ccSHideki Saito } 612f2ec16ccSHideki Saito 613f2ec16ccSHideki Saito RecurrenceDescriptor RedDes; 614f2ec16ccSHideki Saito if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, 615f2ec16ccSHideki Saito DT)) { 616f2ec16ccSHideki Saito if (RedDes.hasUnsafeAlgebra()) 617f2ec16ccSHideki Saito Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst()); 618f2ec16ccSHideki Saito AllowedExit.insert(RedDes.getLoopExitInstr()); 619f2ec16ccSHideki Saito Reductions[Phi] = RedDes; 620f2ec16ccSHideki Saito continue; 621f2ec16ccSHideki Saito } 622f2ec16ccSHideki Saito 623b02b0ad8SAnna Thomas // TODO: Instead of recording the AllowedExit, it would be good to record the 624b02b0ad8SAnna Thomas // complementary set: NotAllowedExit. These include (but may not be 625b02b0ad8SAnna Thomas // limited to): 626b02b0ad8SAnna Thomas // 1. Reduction phis as they represent the one-before-last value, which 627b02b0ad8SAnna Thomas // is not available when vectorized 628b02b0ad8SAnna Thomas // 2. Induction phis and increment when SCEV predicates cannot be used 629b02b0ad8SAnna Thomas // outside the loop - see addInductionPhi 630b02b0ad8SAnna Thomas // 3. Non-Phis with outside uses when SCEV predicates cannot be used 631b02b0ad8SAnna Thomas // outside the loop - see call to hasOutsideLoopUser in the non-phi 632b02b0ad8SAnna Thomas // handling below 633b02b0ad8SAnna Thomas // 4. FirstOrderRecurrence phis that can possibly be handled by 634b02b0ad8SAnna Thomas // extraction. 635b02b0ad8SAnna Thomas // By recording these, we can then reason about ways to vectorize each 636b02b0ad8SAnna Thomas // of these NotAllowedExit. 637f2ec16ccSHideki Saito InductionDescriptor ID; 638f2ec16ccSHideki Saito if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) { 639f2ec16ccSHideki Saito addInductionPhi(Phi, ID, AllowedExit); 640f2ec16ccSHideki Saito if (ID.hasUnsafeAlgebra() && !HasFunNoNaNAttr) 641f2ec16ccSHideki Saito Requirements->addUnsafeAlgebraInst(ID.getUnsafeAlgebraInst()); 642f2ec16ccSHideki Saito continue; 643f2ec16ccSHideki Saito } 644f2ec16ccSHideki Saito 645f2ec16ccSHideki Saito if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop, 646f2ec16ccSHideki Saito SinkAfter, DT)) { 647f2ec16ccSHideki Saito FirstOrderRecurrences.insert(Phi); 648f2ec16ccSHideki Saito continue; 649f2ec16ccSHideki Saito } 650f2ec16ccSHideki Saito 651f2ec16ccSHideki Saito // As a last resort, coerce the PHI to a AddRec expression 652f2ec16ccSHideki Saito // and re-try classifying it a an induction PHI. 653f2ec16ccSHideki Saito if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) { 654f2ec16ccSHideki Saito addInductionPhi(Phi, ID, AllowedExit); 655f2ec16ccSHideki Saito continue; 656f2ec16ccSHideki Saito } 657f2ec16ccSHideki Saito 6589e97caf5SRenato Golin reportVectorizationFailure("Found an unidentified PHI", 6599e97caf5SRenato Golin "value that could not be identified as " 6609e97caf5SRenato Golin "reduction is used outside the loop", 661ec818d7fSHideki Saito "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); 662f2ec16ccSHideki Saito return false; 663f2ec16ccSHideki Saito } // end of PHI handling 664f2ec16ccSHideki Saito 665f2ec16ccSHideki Saito // We handle calls that: 666f2ec16ccSHideki Saito // * Are debug info intrinsics. 667f2ec16ccSHideki Saito // * Have a mapping to an IR intrinsic. 668f2ec16ccSHideki Saito // * Have a vector version available. 669f2ec16ccSHideki Saito auto *CI = dyn_cast<CallInst>(&I); 670f2ec16ccSHideki Saito if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && 671f2ec16ccSHideki Saito !isa<DbgInfoIntrinsic>(CI) && 672f2ec16ccSHideki Saito !(CI->getCalledFunction() && TLI && 673f2ec16ccSHideki Saito TLI->isFunctionVectorizable(CI->getCalledFunction()->getName()))) { 6747d65fe5cSSanjay Patel // If the call is a recognized math libary call, it is likely that 6757d65fe5cSSanjay Patel // we can vectorize it given loosened floating-point constraints. 6767d65fe5cSSanjay Patel LibFunc Func; 6777d65fe5cSSanjay Patel bool IsMathLibCall = 6787d65fe5cSSanjay Patel TLI && CI->getCalledFunction() && 6797d65fe5cSSanjay Patel CI->getType()->isFloatingPointTy() && 6807d65fe5cSSanjay Patel TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && 6817d65fe5cSSanjay Patel TLI->hasOptimizedCodeGen(Func); 6827d65fe5cSSanjay Patel 6837d65fe5cSSanjay Patel if (IsMathLibCall) { 6847d65fe5cSSanjay Patel // TODO: Ideally, we should not use clang-specific language here, 6857d65fe5cSSanjay Patel // but it's hard to provide meaningful yet generic advice. 6867d65fe5cSSanjay Patel // Also, should this be guarded by allowExtraAnalysis() and/or be part 6877d65fe5cSSanjay Patel // of the returned info from isFunctionVectorizable()? 6889e97caf5SRenato Golin reportVectorizationFailure("Found a non-intrinsic callsite", 6899e97caf5SRenato Golin "library call cannot be vectorized. " 6907d65fe5cSSanjay Patel "Try compiling with -fno-math-errno, -ffast-math, " 6919e97caf5SRenato Golin "or similar flags", 692ec818d7fSHideki Saito "CantVectorizeLibcall", ORE, TheLoop, CI); 6937d65fe5cSSanjay Patel } else { 6949e97caf5SRenato Golin reportVectorizationFailure("Found a non-intrinsic callsite", 6959e97caf5SRenato Golin "call instruction cannot be vectorized", 696ec818d7fSHideki Saito "CantVectorizeLibcall", ORE, TheLoop, CI); 6977d65fe5cSSanjay Patel } 698f2ec16ccSHideki Saito return false; 699f2ec16ccSHideki Saito } 700f2ec16ccSHideki Saito 701a066f1f9SSimon Pilgrim // Some intrinsics have scalar arguments and should be same in order for 702a066f1f9SSimon Pilgrim // them to be vectorized (i.e. loop invariant). 703a066f1f9SSimon Pilgrim if (CI) { 704f2ec16ccSHideki Saito auto *SE = PSE.getSE(); 705a066f1f9SSimon Pilgrim Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); 706a066f1f9SSimon Pilgrim for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i) 707a066f1f9SSimon Pilgrim if (hasVectorInstrinsicScalarOpd(IntrinID, i)) { 708a066f1f9SSimon Pilgrim if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { 7099e97caf5SRenato Golin reportVectorizationFailure("Found unvectorizable intrinsic", 7109e97caf5SRenato Golin "intrinsic instruction cannot be vectorized", 711ec818d7fSHideki Saito "CantVectorizeIntrinsic", ORE, TheLoop, CI); 712f2ec16ccSHideki Saito return false; 713f2ec16ccSHideki Saito } 714f2ec16ccSHideki Saito } 715a066f1f9SSimon Pilgrim } 716f2ec16ccSHideki Saito 717f2ec16ccSHideki Saito // Check that the instruction return type is vectorizable. 718f2ec16ccSHideki Saito // Also, we can't vectorize extractelement instructions. 719f2ec16ccSHideki Saito if ((!VectorType::isValidElementType(I.getType()) && 720f2ec16ccSHideki Saito !I.getType()->isVoidTy()) || 721f2ec16ccSHideki Saito isa<ExtractElementInst>(I)) { 7229e97caf5SRenato Golin reportVectorizationFailure("Found unvectorizable type", 7239e97caf5SRenato Golin "instruction return type cannot be vectorized", 724ec818d7fSHideki Saito "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); 725f2ec16ccSHideki Saito return false; 726f2ec16ccSHideki Saito } 727f2ec16ccSHideki Saito 728f2ec16ccSHideki Saito // Check that the stored type is vectorizable. 729f2ec16ccSHideki Saito if (auto *ST = dyn_cast<StoreInst>(&I)) { 730f2ec16ccSHideki Saito Type *T = ST->getValueOperand()->getType(); 731f2ec16ccSHideki Saito if (!VectorType::isValidElementType(T)) { 7329e97caf5SRenato Golin reportVectorizationFailure("Store instruction cannot be vectorized", 7339e97caf5SRenato Golin "store instruction cannot be vectorized", 734ec818d7fSHideki Saito "CantVectorizeStore", ORE, TheLoop, ST); 735f2ec16ccSHideki Saito return false; 736f2ec16ccSHideki Saito } 737f2ec16ccSHideki Saito 7386452bdd2SWarren Ristow // For nontemporal stores, check that a nontemporal vector version is 7396452bdd2SWarren Ristow // supported on the target. 7406452bdd2SWarren Ristow if (ST->getMetadata(LLVMContext::MD_nontemporal)) { 7416452bdd2SWarren Ristow // Arbitrarily try a vector of 2 elements. 7426452bdd2SWarren Ristow Type *VecTy = VectorType::get(T, /*NumElements=*/2); 7436452bdd2SWarren Ristow assert(VecTy && "did not find vectorized version of stored type"); 7445e1e83eeSGuillaume Chatelet const MaybeAlign Alignment = getLoadStoreAlignment(ST); 74533671cefSGuillaume Chatelet assert(Alignment && "Alignment should be set"); 7465e1e83eeSGuillaume Chatelet if (!TTI->isLegalNTStore(VecTy, *Alignment)) { 7476452bdd2SWarren Ristow reportVectorizationFailure( 7486452bdd2SWarren Ristow "nontemporal store instruction cannot be vectorized", 7496452bdd2SWarren Ristow "nontemporal store instruction cannot be vectorized", 750ec818d7fSHideki Saito "CantVectorizeNontemporalStore", ORE, TheLoop, ST); 7516452bdd2SWarren Ristow return false; 7526452bdd2SWarren Ristow } 7536452bdd2SWarren Ristow } 7546452bdd2SWarren Ristow 7556452bdd2SWarren Ristow } else if (auto *LD = dyn_cast<LoadInst>(&I)) { 7566452bdd2SWarren Ristow if (LD->getMetadata(LLVMContext::MD_nontemporal)) { 7576452bdd2SWarren Ristow // For nontemporal loads, check that a nontemporal vector version is 7586452bdd2SWarren Ristow // supported on the target (arbitrarily try a vector of 2 elements). 7596452bdd2SWarren Ristow Type *VecTy = VectorType::get(I.getType(), /*NumElements=*/2); 7606452bdd2SWarren Ristow assert(VecTy && "did not find vectorized version of load type"); 7615e1e83eeSGuillaume Chatelet const MaybeAlign Alignment = getLoadStoreAlignment(LD); 76233671cefSGuillaume Chatelet assert(Alignment && "Alignment should be set"); 7635e1e83eeSGuillaume Chatelet if (!TTI->isLegalNTLoad(VecTy, *Alignment)) { 7646452bdd2SWarren Ristow reportVectorizationFailure( 7656452bdd2SWarren Ristow "nontemporal load instruction cannot be vectorized", 7666452bdd2SWarren Ristow "nontemporal load instruction cannot be vectorized", 767ec818d7fSHideki Saito "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); 7686452bdd2SWarren Ristow return false; 7696452bdd2SWarren Ristow } 7706452bdd2SWarren Ristow } 7716452bdd2SWarren Ristow 772f2ec16ccSHideki Saito // FP instructions can allow unsafe algebra, thus vectorizable by 773f2ec16ccSHideki Saito // non-IEEE-754 compliant SIMD units. 774f2ec16ccSHideki Saito // This applies to floating-point math operations and calls, not memory 775f2ec16ccSHideki Saito // operations, shuffles, or casts, as they don't change precision or 776f2ec16ccSHideki Saito // semantics. 777f2ec16ccSHideki Saito } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && 778f2ec16ccSHideki Saito !I.isFast()) { 779d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); 780f2ec16ccSHideki Saito Hints->setPotentiallyUnsafe(); 781f2ec16ccSHideki Saito } 782f2ec16ccSHideki Saito 783f2ec16ccSHideki Saito // Reduction instructions are allowed to have exit users. 784f2ec16ccSHideki Saito // All other instructions must not have external users. 785f2ec16ccSHideki Saito if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { 786b02b0ad8SAnna Thomas // We can safely vectorize loops where instructions within the loop are 787b02b0ad8SAnna Thomas // used outside the loop only if the SCEV predicates within the loop is 788b02b0ad8SAnna Thomas // same as outside the loop. Allowing the exit means reusing the SCEV 789b02b0ad8SAnna Thomas // outside the loop. 790b02b0ad8SAnna Thomas if (PSE.getUnionPredicate().isAlwaysTrue()) { 791b02b0ad8SAnna Thomas AllowedExit.insert(&I); 792b02b0ad8SAnna Thomas continue; 793b02b0ad8SAnna Thomas } 7949e97caf5SRenato Golin reportVectorizationFailure("Value cannot be used outside the loop", 7959e97caf5SRenato Golin "value cannot be used outside the loop", 796ec818d7fSHideki Saito "ValueUsedOutsideLoop", ORE, TheLoop, &I); 797f2ec16ccSHideki Saito return false; 798f2ec16ccSHideki Saito } 799f2ec16ccSHideki Saito } // next instr. 800f2ec16ccSHideki Saito } 801f2ec16ccSHideki Saito 802f2ec16ccSHideki Saito if (!PrimaryInduction) { 803f2ec16ccSHideki Saito if (Inductions.empty()) { 8049e97caf5SRenato Golin reportVectorizationFailure("Did not find one integer induction var", 8059e97caf5SRenato Golin "loop induction variable could not be identified", 806ec818d7fSHideki Saito "NoInductionVariable", ORE, TheLoop); 807f2ec16ccSHideki Saito return false; 8084f27730eSWarren Ristow } else if (!WidestIndTy) { 8099e97caf5SRenato Golin reportVectorizationFailure("Did not find one integer induction var", 8109e97caf5SRenato Golin "integer loop induction variable could not be identified", 811ec818d7fSHideki Saito "NoIntegerInductionVariable", ORE, TheLoop); 8124f27730eSWarren Ristow return false; 8139e97caf5SRenato Golin } else { 8149e97caf5SRenato Golin LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); 815f2ec16ccSHideki Saito } 816f2ec16ccSHideki Saito } 817f2ec16ccSHideki Saito 818*9d24933fSFlorian Hahn // For first order recurrences, we use the previous value (incoming value from 819*9d24933fSFlorian Hahn // the latch) to check if it dominates all users of the recurrence. Bail out 820*9d24933fSFlorian Hahn // if we have to sink such an instruction for another recurrence, as the 821*9d24933fSFlorian Hahn // dominance requirement may not hold after sinking. 822*9d24933fSFlorian Hahn BasicBlock *LoopLatch = TheLoop->getLoopLatch(); 823*9d24933fSFlorian Hahn if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) { 824*9d24933fSFlorian Hahn Instruction *V = 825*9d24933fSFlorian Hahn cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch)); 826*9d24933fSFlorian Hahn return SinkAfter.find(V) != SinkAfter.end(); 827*9d24933fSFlorian Hahn })) 828*9d24933fSFlorian Hahn return false; 829*9d24933fSFlorian Hahn 830f2ec16ccSHideki Saito // Now we know the widest induction type, check if our found induction 831f2ec16ccSHideki Saito // is the same size. If it's not, unset it here and InnerLoopVectorizer 832f2ec16ccSHideki Saito // will create another. 833f2ec16ccSHideki Saito if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) 834f2ec16ccSHideki Saito PrimaryInduction = nullptr; 835f2ec16ccSHideki Saito 836f2ec16ccSHideki Saito return true; 837f2ec16ccSHideki Saito } 838f2ec16ccSHideki Saito 839f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeMemory() { 840f2ec16ccSHideki Saito LAI = &(*GetLAA)(*TheLoop); 841f2ec16ccSHideki Saito const OptimizationRemarkAnalysis *LAR = LAI->getReport(); 842f2ec16ccSHideki Saito if (LAR) { 843f2ec16ccSHideki Saito ORE->emit([&]() { 844f2ec16ccSHideki Saito return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), 845f2ec16ccSHideki Saito "loop not vectorized: ", *LAR); 846f2ec16ccSHideki Saito }); 847f2ec16ccSHideki Saito } 848f2ec16ccSHideki Saito if (!LAI->canVectorizeMemory()) 849f2ec16ccSHideki Saito return false; 850f2ec16ccSHideki Saito 8515e9215f0SAnna Thomas if (LAI->hasDependenceInvolvingLoopInvariantAddress()) { 8529e97caf5SRenato Golin reportVectorizationFailure("Stores to a uniform address", 8539e97caf5SRenato Golin "write to a loop invariant address could not be vectorized", 854ec818d7fSHideki Saito "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 855f2ec16ccSHideki Saito return false; 856f2ec16ccSHideki Saito } 857f2ec16ccSHideki Saito Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); 858f2ec16ccSHideki Saito PSE.addPredicate(LAI->getPSE().getUnionPredicate()); 859f2ec16ccSHideki Saito 860f2ec16ccSHideki Saito return true; 861f2ec16ccSHideki Saito } 862f2ec16ccSHideki Saito 863f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionPhi(const Value *V) { 864f2ec16ccSHideki Saito Value *In0 = const_cast<Value *>(V); 865f2ec16ccSHideki Saito PHINode *PN = dyn_cast_or_null<PHINode>(In0); 866f2ec16ccSHideki Saito if (!PN) 867f2ec16ccSHideki Saito return false; 868f2ec16ccSHideki Saito 869f2ec16ccSHideki Saito return Inductions.count(PN); 870f2ec16ccSHideki Saito } 871f2ec16ccSHideki Saito 872f2ec16ccSHideki Saito bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) { 873f2ec16ccSHideki Saito auto *Inst = dyn_cast<Instruction>(V); 874f2ec16ccSHideki Saito return (Inst && InductionCastsToIgnore.count(Inst)); 875f2ec16ccSHideki Saito } 876f2ec16ccSHideki Saito 877f2ec16ccSHideki Saito bool LoopVectorizationLegality::isInductionVariable(const Value *V) { 878f2ec16ccSHideki Saito return isInductionPhi(V) || isCastedInductionVariable(V); 879f2ec16ccSHideki Saito } 880f2ec16ccSHideki Saito 881f2ec16ccSHideki Saito bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) { 882f2ec16ccSHideki Saito return FirstOrderRecurrences.count(Phi); 883f2ec16ccSHideki Saito } 884f2ec16ccSHideki Saito 885f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) { 886f2ec16ccSHideki Saito return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 887f2ec16ccSHideki Saito } 888f2ec16ccSHideki Saito 889f2ec16ccSHideki Saito bool LoopVectorizationLegality::blockCanBePredicated( 890d57d73daSDorit Nuzman BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, bool PreserveGuards) { 891f2ec16ccSHideki Saito const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 892f2ec16ccSHideki Saito 893f2ec16ccSHideki Saito for (Instruction &I : *BB) { 894f2ec16ccSHideki Saito // Check that we don't have a constant expression that can trap as operand. 895f2ec16ccSHideki Saito for (Value *Operand : I.operands()) { 896f2ec16ccSHideki Saito if (auto *C = dyn_cast<Constant>(Operand)) 897f2ec16ccSHideki Saito if (C->canTrap()) 898f2ec16ccSHideki Saito return false; 899f2ec16ccSHideki Saito } 900f2ec16ccSHideki Saito // We might be able to hoist the load. 901f2ec16ccSHideki Saito if (I.mayReadFromMemory()) { 902f2ec16ccSHideki Saito auto *LI = dyn_cast<LoadInst>(&I); 903f2ec16ccSHideki Saito if (!LI) 904f2ec16ccSHideki Saito return false; 905f2ec16ccSHideki Saito if (!SafePtrs.count(LI->getPointerOperand())) { 906f2ec16ccSHideki Saito // !llvm.mem.parallel_loop_access implies if-conversion safety. 907f2ec16ccSHideki Saito // Otherwise, record that the load needs (real or emulated) masking 908f2ec16ccSHideki Saito // and let the cost model decide. 909d57d73daSDorit Nuzman if (!IsAnnotatedParallel || PreserveGuards) 910f2ec16ccSHideki Saito MaskedOp.insert(LI); 911f2ec16ccSHideki Saito continue; 912f2ec16ccSHideki Saito } 913f2ec16ccSHideki Saito } 914f2ec16ccSHideki Saito 915f2ec16ccSHideki Saito if (I.mayWriteToMemory()) { 916f2ec16ccSHideki Saito auto *SI = dyn_cast<StoreInst>(&I); 917f2ec16ccSHideki Saito if (!SI) 918f2ec16ccSHideki Saito return false; 919f2ec16ccSHideki Saito // Predicated store requires some form of masking: 920f2ec16ccSHideki Saito // 1) masked store HW instruction, 921f2ec16ccSHideki Saito // 2) emulation via load-blend-store (only if safe and legal to do so, 922f2ec16ccSHideki Saito // be aware on the race conditions), or 923f2ec16ccSHideki Saito // 3) element-by-element predicate check and scalar store. 924f2ec16ccSHideki Saito MaskedOp.insert(SI); 925f2ec16ccSHideki Saito continue; 926f2ec16ccSHideki Saito } 927f2ec16ccSHideki Saito if (I.mayThrow()) 928f2ec16ccSHideki Saito return false; 929f2ec16ccSHideki Saito } 930f2ec16ccSHideki Saito 931f2ec16ccSHideki Saito return true; 932f2ec16ccSHideki Saito } 933f2ec16ccSHideki Saito 934f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeWithIfConvert() { 935f2ec16ccSHideki Saito if (!EnableIfConversion) { 9369e97caf5SRenato Golin reportVectorizationFailure("If-conversion is disabled", 9379e97caf5SRenato Golin "if-conversion is disabled", 938ec818d7fSHideki Saito "IfConversionDisabled", 939ec818d7fSHideki Saito ORE, TheLoop); 940f2ec16ccSHideki Saito return false; 941f2ec16ccSHideki Saito } 942f2ec16ccSHideki Saito 943f2ec16ccSHideki Saito assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); 944f2ec16ccSHideki Saito 945cf3b5559SPhilip Reames // A list of pointers which are known to be dereferenceable within scope of 946cf3b5559SPhilip Reames // the loop body for each iteration of the loop which executes. That is, 947cf3b5559SPhilip Reames // the memory pointed to can be dereferenced (with the access size implied by 948cf3b5559SPhilip Reames // the value's type) unconditionally within the loop header without 949cf3b5559SPhilip Reames // introducing a new fault. 950f2ec16ccSHideki Saito SmallPtrSet<Value *, 8> SafePointes; 951f2ec16ccSHideki Saito 952f2ec16ccSHideki Saito // Collect safe addresses. 953f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 9547403569bSPhilip Reames if (!blockNeedsPredication(BB)) { 955f2ec16ccSHideki Saito for (Instruction &I : *BB) 956f2ec16ccSHideki Saito if (auto *Ptr = getLoadStorePointerOperand(&I)) 957f2ec16ccSHideki Saito SafePointes.insert(Ptr); 9587403569bSPhilip Reames continue; 9597403569bSPhilip Reames } 9607403569bSPhilip Reames 9617403569bSPhilip Reames // For a block which requires predication, a address may be safe to access 9627403569bSPhilip Reames // in the loop w/o predication if we can prove dereferenceability facts 9637403569bSPhilip Reames // sufficient to ensure it'll never fault within the loop. For the moment, 9647403569bSPhilip Reames // we restrict this to loads; stores are more complicated due to 9657403569bSPhilip Reames // concurrency restrictions. 9667403569bSPhilip Reames ScalarEvolution &SE = *PSE.getSE(); 9677403569bSPhilip Reames for (Instruction &I : *BB) { 9687403569bSPhilip Reames LoadInst *LI = dyn_cast<LoadInst>(&I); 9697403569bSPhilip Reames if (LI && !mustSuppressSpeculation(*LI) && 9707403569bSPhilip Reames isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT)) 9717403569bSPhilip Reames SafePointes.insert(LI->getPointerOperand()); 9727403569bSPhilip Reames } 973f2ec16ccSHideki Saito } 974f2ec16ccSHideki Saito 975f2ec16ccSHideki Saito // Collect the blocks that need predication. 976f2ec16ccSHideki Saito BasicBlock *Header = TheLoop->getHeader(); 977f2ec16ccSHideki Saito for (BasicBlock *BB : TheLoop->blocks()) { 978f2ec16ccSHideki Saito // We don't support switch statements inside loops. 979f2ec16ccSHideki Saito if (!isa<BranchInst>(BB->getTerminator())) { 9809e97caf5SRenato Golin reportVectorizationFailure("Loop contains a switch statement", 9819e97caf5SRenato Golin "loop contains a switch statement", 982ec818d7fSHideki Saito "LoopContainsSwitch", ORE, TheLoop, 983ec818d7fSHideki Saito BB->getTerminator()); 984f2ec16ccSHideki Saito return false; 985f2ec16ccSHideki Saito } 986f2ec16ccSHideki Saito 987f2ec16ccSHideki Saito // We must be able to predicate all blocks that need to be predicated. 988f2ec16ccSHideki Saito if (blockNeedsPredication(BB)) { 989f2ec16ccSHideki Saito if (!blockCanBePredicated(BB, SafePointes)) { 9909e97caf5SRenato Golin reportVectorizationFailure( 9919e97caf5SRenato Golin "Control flow cannot be substituted for a select", 9929e97caf5SRenato Golin "control flow cannot be substituted for a select", 993ec818d7fSHideki Saito "NoCFGForSelect", ORE, TheLoop, 994ec818d7fSHideki Saito BB->getTerminator()); 995f2ec16ccSHideki Saito return false; 996f2ec16ccSHideki Saito } 997f2ec16ccSHideki Saito } else if (BB != Header && !canIfConvertPHINodes(BB)) { 9989e97caf5SRenato Golin reportVectorizationFailure( 9999e97caf5SRenato Golin "Control flow cannot be substituted for a select", 10009e97caf5SRenato Golin "control flow cannot be substituted for a select", 1001ec818d7fSHideki Saito "NoCFGForSelect", ORE, TheLoop, 1002ec818d7fSHideki Saito BB->getTerminator()); 1003f2ec16ccSHideki Saito return false; 1004f2ec16ccSHideki Saito } 1005f2ec16ccSHideki Saito } 1006f2ec16ccSHideki Saito 1007f2ec16ccSHideki Saito // We can if-convert this loop. 1008f2ec16ccSHideki Saito return true; 1009f2ec16ccSHideki Saito } 1010f2ec16ccSHideki Saito 1011f2ec16ccSHideki Saito // Helper function to canVectorizeLoopNestCFG. 1012f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, 1013f2ec16ccSHideki Saito bool UseVPlanNativePath) { 1014f2ec16ccSHideki Saito assert((UseVPlanNativePath || Lp->empty()) && 1015f2ec16ccSHideki Saito "VPlan-native path is not enabled."); 1016f2ec16ccSHideki Saito 1017f2ec16ccSHideki Saito // TODO: ORE should be improved to show more accurate information when an 1018f2ec16ccSHideki Saito // outer loop can't be vectorized because a nested loop is not understood or 1019f2ec16ccSHideki Saito // legal. Something like: "outer_loop_location: loop not vectorized: 1020f2ec16ccSHideki Saito // (inner_loop_location) loop control flow is not understood by vectorizer". 1021f2ec16ccSHideki Saito 1022f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 1023f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1024f2ec16ccSHideki Saito bool Result = true; 1025f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1026f2ec16ccSHideki Saito 1027f2ec16ccSHideki Saito // We must have a loop in canonical form. Loops with indirectbr in them cannot 1028f2ec16ccSHideki Saito // be canonicalized. 1029f2ec16ccSHideki Saito if (!Lp->getLoopPreheader()) { 10309e97caf5SRenato Golin reportVectorizationFailure("Loop doesn't have a legal pre-header", 10319e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1032ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1033f2ec16ccSHideki Saito if (DoExtraAnalysis) 1034f2ec16ccSHideki Saito Result = false; 1035f2ec16ccSHideki Saito else 1036f2ec16ccSHideki Saito return false; 1037f2ec16ccSHideki Saito } 1038f2ec16ccSHideki Saito 1039f2ec16ccSHideki Saito // We must have a single backedge. 1040f2ec16ccSHideki Saito if (Lp->getNumBackEdges() != 1) { 10419e97caf5SRenato Golin reportVectorizationFailure("The loop must have a single backedge", 10429e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1043ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1044f2ec16ccSHideki Saito if (DoExtraAnalysis) 1045f2ec16ccSHideki Saito Result = false; 1046f2ec16ccSHideki Saito else 1047f2ec16ccSHideki Saito return false; 1048f2ec16ccSHideki Saito } 1049f2ec16ccSHideki Saito 1050f2ec16ccSHideki Saito // We must have a single exiting block. 1051f2ec16ccSHideki Saito if (!Lp->getExitingBlock()) { 10529e97caf5SRenato Golin reportVectorizationFailure("The loop must have an exiting block", 10539e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1054ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1055f2ec16ccSHideki Saito if (DoExtraAnalysis) 1056f2ec16ccSHideki Saito Result = false; 1057f2ec16ccSHideki Saito else 1058f2ec16ccSHideki Saito return false; 1059f2ec16ccSHideki Saito } 1060f2ec16ccSHideki Saito 1061f2ec16ccSHideki Saito // We only handle bottom-tested loops, i.e. loop in which the condition is 1062f2ec16ccSHideki Saito // checked at the end of each iteration. With that we can assume that all 1063f2ec16ccSHideki Saito // instructions in the loop are executed the same number of times. 1064f2ec16ccSHideki Saito if (Lp->getExitingBlock() != Lp->getLoopLatch()) { 10659e97caf5SRenato Golin reportVectorizationFailure("The exiting block is not the loop latch", 10669e97caf5SRenato Golin "loop control flow is not understood by vectorizer", 1067ec818d7fSHideki Saito "CFGNotUnderstood", ORE, TheLoop); 1068f2ec16ccSHideki Saito if (DoExtraAnalysis) 1069f2ec16ccSHideki Saito Result = false; 1070f2ec16ccSHideki Saito else 1071f2ec16ccSHideki Saito return false; 1072f2ec16ccSHideki Saito } 1073f2ec16ccSHideki Saito 1074f2ec16ccSHideki Saito return Result; 1075f2ec16ccSHideki Saito } 1076f2ec16ccSHideki Saito 1077f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorizeLoopNestCFG( 1078f2ec16ccSHideki Saito Loop *Lp, bool UseVPlanNativePath) { 1079f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 1080f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1081f2ec16ccSHideki Saito bool Result = true; 1082f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1083f2ec16ccSHideki Saito if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { 1084f2ec16ccSHideki Saito if (DoExtraAnalysis) 1085f2ec16ccSHideki Saito Result = false; 1086f2ec16ccSHideki Saito else 1087f2ec16ccSHideki Saito return false; 1088f2ec16ccSHideki Saito } 1089f2ec16ccSHideki Saito 1090f2ec16ccSHideki Saito // Recursively check whether the loop control flow of nested loops is 1091f2ec16ccSHideki Saito // understood. 1092f2ec16ccSHideki Saito for (Loop *SubLp : *Lp) 1093f2ec16ccSHideki Saito if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { 1094f2ec16ccSHideki Saito if (DoExtraAnalysis) 1095f2ec16ccSHideki Saito Result = false; 1096f2ec16ccSHideki Saito else 1097f2ec16ccSHideki Saito return false; 1098f2ec16ccSHideki Saito } 1099f2ec16ccSHideki Saito 1100f2ec16ccSHideki Saito return Result; 1101f2ec16ccSHideki Saito } 1102f2ec16ccSHideki Saito 1103f2ec16ccSHideki Saito bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { 1104f2ec16ccSHideki Saito // Store the result and return it at the end instead of exiting early, in case 1105f2ec16ccSHideki Saito // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 1106f2ec16ccSHideki Saito bool Result = true; 1107f2ec16ccSHideki Saito 1108f2ec16ccSHideki Saito bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 1109f2ec16ccSHideki Saito // Check whether the loop-related control flow in the loop nest is expected by 1110f2ec16ccSHideki Saito // vectorizer. 1111f2ec16ccSHideki Saito if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { 1112f2ec16ccSHideki Saito if (DoExtraAnalysis) 1113f2ec16ccSHideki Saito Result = false; 1114f2ec16ccSHideki Saito else 1115f2ec16ccSHideki Saito return false; 1116f2ec16ccSHideki Saito } 1117f2ec16ccSHideki Saito 1118f2ec16ccSHideki Saito // We need to have a loop header. 1119d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() 1120f2ec16ccSHideki Saito << '\n'); 1121f2ec16ccSHideki Saito 1122f2ec16ccSHideki Saito // Specific checks for outer loops. We skip the remaining legal checks at this 1123f2ec16ccSHideki Saito // point because they don't support outer loops. 1124f2ec16ccSHideki Saito if (!TheLoop->empty()) { 1125f2ec16ccSHideki Saito assert(UseVPlanNativePath && "VPlan-native path is not enabled."); 1126f2ec16ccSHideki Saito 1127f2ec16ccSHideki Saito if (!canVectorizeOuterLoop()) { 11289e97caf5SRenato Golin reportVectorizationFailure("Unsupported outer loop", 11299e97caf5SRenato Golin "unsupported outer loop", 1130ec818d7fSHideki Saito "UnsupportedOuterLoop", 1131ec818d7fSHideki Saito ORE, TheLoop); 1132f2ec16ccSHideki Saito // TODO: Implement DoExtraAnalysis when subsequent legal checks support 1133f2ec16ccSHideki Saito // outer loops. 1134f2ec16ccSHideki Saito return false; 1135f2ec16ccSHideki Saito } 1136f2ec16ccSHideki Saito 1137d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); 1138f2ec16ccSHideki Saito return Result; 1139f2ec16ccSHideki Saito } 1140f2ec16ccSHideki Saito 1141f2ec16ccSHideki Saito assert(TheLoop->empty() && "Inner loop expected."); 1142f2ec16ccSHideki Saito // Check if we can if-convert non-single-bb loops. 1143f2ec16ccSHideki Saito unsigned NumBlocks = TheLoop->getNumBlocks(); 1144f2ec16ccSHideki Saito if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { 1145d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); 1146f2ec16ccSHideki Saito if (DoExtraAnalysis) 1147f2ec16ccSHideki Saito Result = false; 1148f2ec16ccSHideki Saito else 1149f2ec16ccSHideki Saito return false; 1150f2ec16ccSHideki Saito } 1151f2ec16ccSHideki Saito 1152f2ec16ccSHideki Saito // Check if we can vectorize the instructions and CFG in this loop. 1153f2ec16ccSHideki Saito if (!canVectorizeInstrs()) { 1154d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); 1155f2ec16ccSHideki Saito if (DoExtraAnalysis) 1156f2ec16ccSHideki Saito Result = false; 1157f2ec16ccSHideki Saito else 1158f2ec16ccSHideki Saito return false; 1159f2ec16ccSHideki Saito } 1160f2ec16ccSHideki Saito 1161f2ec16ccSHideki Saito // Go over each instruction and look at memory deps. 1162f2ec16ccSHideki Saito if (!canVectorizeMemory()) { 1163d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); 1164f2ec16ccSHideki Saito if (DoExtraAnalysis) 1165f2ec16ccSHideki Saito Result = false; 1166f2ec16ccSHideki Saito else 1167f2ec16ccSHideki Saito return false; 1168f2ec16ccSHideki Saito } 1169f2ec16ccSHideki Saito 1170d34e60caSNicola Zaghen LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" 1171f2ec16ccSHideki Saito << (LAI->getRuntimePointerChecking()->Need 1172f2ec16ccSHideki Saito ? " (with a runtime bound check)" 1173f2ec16ccSHideki Saito : "") 1174f2ec16ccSHideki Saito << "!\n"); 1175f2ec16ccSHideki Saito 1176f2ec16ccSHideki Saito unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; 1177f2ec16ccSHideki Saito if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) 1178f2ec16ccSHideki Saito SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; 1179f2ec16ccSHideki Saito 1180f2ec16ccSHideki Saito if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { 11819e97caf5SRenato Golin reportVectorizationFailure("Too many SCEV checks needed", 11829e97caf5SRenato Golin "Too many SCEV assumptions need to be made and checked at runtime", 1183ec818d7fSHideki Saito "TooManySCEVRunTimeChecks", ORE, TheLoop); 1184f2ec16ccSHideki Saito if (DoExtraAnalysis) 1185f2ec16ccSHideki Saito Result = false; 1186f2ec16ccSHideki Saito else 1187f2ec16ccSHideki Saito return false; 1188f2ec16ccSHideki Saito } 1189f2ec16ccSHideki Saito 1190f2ec16ccSHideki Saito // Okay! We've done all the tests. If any have failed, return false. Otherwise 1191f2ec16ccSHideki Saito // we can vectorize, and at this point we don't have any other mem analysis 1192f2ec16ccSHideki Saito // which may limit our maximum vectorization factor, so just return true with 1193f2ec16ccSHideki Saito // no restrictions. 1194f2ec16ccSHideki Saito return Result; 1195f2ec16ccSHideki Saito } 1196f2ec16ccSHideki Saito 1197d57d73daSDorit Nuzman bool LoopVectorizationLegality::prepareToFoldTailByMasking() { 1198b0b5312eSAyal Zaks 1199b0b5312eSAyal Zaks LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); 1200b0b5312eSAyal Zaks 1201b0b5312eSAyal Zaks if (!PrimaryInduction) { 12029e97caf5SRenato Golin reportVectorizationFailure( 12039e97caf5SRenato Golin "No primary induction, cannot fold tail by masking", 12049e97caf5SRenato Golin "Missing a primary induction variable in the loop, which is " 12059e97caf5SRenato Golin "needed in order to fold tail by masking as required.", 1206ec818d7fSHideki Saito "NoPrimaryInduction", ORE, TheLoop); 1207b0b5312eSAyal Zaks return false; 1208b0b5312eSAyal Zaks } 1209b0b5312eSAyal Zaks 1210d15df0edSAyal Zaks SmallPtrSet<const Value *, 8> ReductionLiveOuts; 1211b0b5312eSAyal Zaks 1212d15df0edSAyal Zaks for (auto &Reduction : *getReductionVars()) 1213d15df0edSAyal Zaks ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); 1214d15df0edSAyal Zaks 1215d15df0edSAyal Zaks // TODO: handle non-reduction outside users when tail is folded by masking. 1216b0b5312eSAyal Zaks for (auto *AE : AllowedExit) { 1217d15df0edSAyal Zaks // Check that all users of allowed exit values are inside the loop or 1218d15df0edSAyal Zaks // are the live-out of a reduction. 1219d15df0edSAyal Zaks if (ReductionLiveOuts.count(AE)) 1220d15df0edSAyal Zaks continue; 1221b0b5312eSAyal Zaks for (User *U : AE->users()) { 1222b0b5312eSAyal Zaks Instruction *UI = cast<Instruction>(U); 1223b0b5312eSAyal Zaks if (TheLoop->contains(UI)) 1224b0b5312eSAyal Zaks continue; 12259e97caf5SRenato Golin reportVectorizationFailure( 12269e97caf5SRenato Golin "Cannot fold tail by masking, loop has an outside user for", 12279e97caf5SRenato Golin "Cannot fold tail by masking in the presence of live outs.", 1228ec818d7fSHideki Saito "LiveOutFoldingTailByMasking", ORE, TheLoop, UI); 1229b0b5312eSAyal Zaks return false; 1230b0b5312eSAyal Zaks } 1231b0b5312eSAyal Zaks } 1232b0b5312eSAyal Zaks 1233b0b5312eSAyal Zaks // The list of pointers that we can safely read and write to remains empty. 1234b0b5312eSAyal Zaks SmallPtrSet<Value *, 8> SafePointers; 1235b0b5312eSAyal Zaks 1236b0b5312eSAyal Zaks // Check and mark all blocks for predication, including those that ordinarily 1237b0b5312eSAyal Zaks // do not need predication such as the header block. 1238b0b5312eSAyal Zaks for (BasicBlock *BB : TheLoop->blocks()) { 1239d57d73daSDorit Nuzman if (!blockCanBePredicated(BB, SafePointers, /* MaskAllLoads= */ true)) { 12409e97caf5SRenato Golin reportVectorizationFailure( 12419e97caf5SRenato Golin "Cannot fold tail by masking as required", 12429e97caf5SRenato Golin "control flow cannot be substituted for a select", 1243ec818d7fSHideki Saito "NoCFGForSelect", ORE, TheLoop, 1244ec818d7fSHideki Saito BB->getTerminator()); 1245b0b5312eSAyal Zaks return false; 1246b0b5312eSAyal Zaks } 1247b0b5312eSAyal Zaks } 1248b0b5312eSAyal Zaks 1249b0b5312eSAyal Zaks LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); 1250b0b5312eSAyal Zaks return true; 1251b0b5312eSAyal Zaks } 1252b0b5312eSAyal Zaks 1253f2ec16ccSHideki Saito } // namespace llvm 1254