10b57cec5SDimitry Andric //===- LoopVectorizationLegality.cpp --------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file provides loop vectorization legality analysis. Original code
100b57cec5SDimitry Andric // resided in LoopVectorize.cpp for a long time.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // At this point, it is implemented as a utility class, not as an analysis
130b57cec5SDimitry Andric // pass. It should be easy to create an analysis pass around it if there
140b57cec5SDimitry Andric // is a need (but D45420 needs to happen first).
150b57cec5SDimitry Andric //
16af732203SDimitry Andric
170b57cec5SDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
188bcb0991SDimitry Andric #include "llvm/Analysis/Loads.h"
195ffd83dbSDimitry Andric #include "llvm/Analysis/LoopInfo.h"
20af732203SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
218bcb0991SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
220b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
230b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
245ffd83dbSDimitry Andric #include "llvm/IR/PatternMatch.h"
25af732203SDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h"
265ffd83dbSDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorize.h"
270b57cec5SDimitry Andric
280b57cec5SDimitry Andric using namespace llvm;
295ffd83dbSDimitry Andric using namespace PatternMatch;
300b57cec5SDimitry Andric
310b57cec5SDimitry Andric #define LV_NAME "loop-vectorize"
320b57cec5SDimitry Andric #define DEBUG_TYPE LV_NAME
330b57cec5SDimitry Andric
340b57cec5SDimitry Andric extern cl::opt<bool> EnableVPlanPredication;
350b57cec5SDimitry Andric
360b57cec5SDimitry Andric static cl::opt<bool>
370b57cec5SDimitry Andric EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
380b57cec5SDimitry Andric cl::desc("Enable if-conversion during vectorization."));
390b57cec5SDimitry Andric
40*5f7ddb14SDimitry Andric namespace llvm {
41*5f7ddb14SDimitry Andric cl::opt<bool>
42*5f7ddb14SDimitry Andric HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
43*5f7ddb14SDimitry Andric cl::desc("Allow enabling loop hints to reorder "
44*5f7ddb14SDimitry Andric "FP operations during vectorization."));
45*5f7ddb14SDimitry Andric }
460b57cec5SDimitry Andric
47*5f7ddb14SDimitry Andric // TODO: Move size-based thresholds out of legality checking, make cost based
48*5f7ddb14SDimitry Andric // decisions instead of hard thresholds.
490b57cec5SDimitry Andric static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
500b57cec5SDimitry Andric "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
510b57cec5SDimitry Andric cl::desc("The maximum number of SCEV checks allowed."));
520b57cec5SDimitry Andric
530b57cec5SDimitry Andric static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
540b57cec5SDimitry Andric "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
550b57cec5SDimitry Andric cl::desc("The maximum number of SCEV checks allowed with a "
560b57cec5SDimitry Andric "vectorize(enable) pragma"));
570b57cec5SDimitry Andric
58*5f7ddb14SDimitry Andric // FIXME: When scalable vectorization is stable enough, change the default
59*5f7ddb14SDimitry Andric // to SK_PreferFixedWidth.
60*5f7ddb14SDimitry Andric static cl::opt<LoopVectorizeHints::ScalableForceKind> ScalableVectorization(
61*5f7ddb14SDimitry Andric "scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly),
62*5f7ddb14SDimitry Andric cl::Hidden,
63*5f7ddb14SDimitry Andric cl::desc("Control whether the compiler can use scalable vectors to "
64*5f7ddb14SDimitry Andric "vectorize a loop"),
65*5f7ddb14SDimitry Andric cl::values(
66*5f7ddb14SDimitry Andric clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
67*5f7ddb14SDimitry Andric "Scalable vectorization is disabled."),
68*5f7ddb14SDimitry Andric clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on",
69*5f7ddb14SDimitry Andric "Scalable vectorization is available, but favor fixed-width "
70*5f7ddb14SDimitry Andric "vectorization when the cost is inconclusive."),
71*5f7ddb14SDimitry Andric clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred",
72*5f7ddb14SDimitry Andric "Scalable vectorization is available and favored when the "
73*5f7ddb14SDimitry Andric "cost is inconclusive.")));
74*5f7ddb14SDimitry Andric
750b57cec5SDimitry Andric /// Maximum vectorization interleave count.
760b57cec5SDimitry Andric static const unsigned MaxInterleaveFactor = 16;
770b57cec5SDimitry Andric
780b57cec5SDimitry Andric namespace llvm {
790b57cec5SDimitry Andric
validate(unsigned Val)800b57cec5SDimitry Andric bool LoopVectorizeHints::Hint::validate(unsigned Val) {
810b57cec5SDimitry Andric switch (Kind) {
820b57cec5SDimitry Andric case HK_WIDTH:
830b57cec5SDimitry Andric return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
84*5f7ddb14SDimitry Andric case HK_INTERLEAVE:
850b57cec5SDimitry Andric return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
860b57cec5SDimitry Andric case HK_FORCE:
870b57cec5SDimitry Andric return (Val <= 1);
880b57cec5SDimitry Andric case HK_ISVECTORIZED:
898bcb0991SDimitry Andric case HK_PREDICATE:
90af732203SDimitry Andric case HK_SCALABLE:
910b57cec5SDimitry Andric return (Val == 0 || Val == 1);
920b57cec5SDimitry Andric }
930b57cec5SDimitry Andric return false;
940b57cec5SDimitry Andric }
950b57cec5SDimitry Andric
LoopVectorizeHints(const Loop * L,bool InterleaveOnlyWhenForced,OptimizationRemarkEmitter & ORE)960b57cec5SDimitry Andric LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
970b57cec5SDimitry Andric bool InterleaveOnlyWhenForced,
980b57cec5SDimitry Andric OptimizationRemarkEmitter &ORE)
990b57cec5SDimitry Andric : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
100*5f7ddb14SDimitry Andric Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
1010b57cec5SDimitry Andric Force("vectorize.enable", FK_Undefined, HK_FORCE),
1028bcb0991SDimitry Andric IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
103af732203SDimitry Andric Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
104*5f7ddb14SDimitry Andric Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
105*5f7ddb14SDimitry Andric TheLoop(L), ORE(ORE) {
1060b57cec5SDimitry Andric // Populate values with existing loop metadata.
1070b57cec5SDimitry Andric getHintsFromMetadata();
1080b57cec5SDimitry Andric
1090b57cec5SDimitry Andric // force-vector-interleave overrides DisableInterleaving.
1100b57cec5SDimitry Andric if (VectorizerParams::isInterleaveForced())
1110b57cec5SDimitry Andric Interleave.Value = VectorizerParams::VectorizationInterleave;
1120b57cec5SDimitry Andric
113*5f7ddb14SDimitry Andric if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
114*5f7ddb14SDimitry Andric // If the width is set, but the metadata says nothing about the scalable
115*5f7ddb14SDimitry Andric // property, then assume it concerns only a fixed-width UserVF.
116*5f7ddb14SDimitry Andric // If width is not set, the flag takes precedence.
117*5f7ddb14SDimitry Andric Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization;
118*5f7ddb14SDimitry Andric else if (ScalableVectorization == SK_FixedWidthOnly)
119*5f7ddb14SDimitry Andric // If the flag is set to disable any use of scalable vectors, override the
120*5f7ddb14SDimitry Andric // loop hint.
121*5f7ddb14SDimitry Andric Scalable.Value = SK_FixedWidthOnly;
122*5f7ddb14SDimitry Andric
1230b57cec5SDimitry Andric if (IsVectorized.Value != 1)
1240b57cec5SDimitry Andric // If the vectorization width and interleaving count are both 1 then
1250b57cec5SDimitry Andric // consider the loop to have been already vectorized because there's
1260b57cec5SDimitry Andric // nothing more that we can do.
127af732203SDimitry Andric IsVectorized.Value =
128*5f7ddb14SDimitry Andric getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
129*5f7ddb14SDimitry Andric LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
1300b57cec5SDimitry Andric << "LV: Interleaving disabled by the pass manager\n");
1310b57cec5SDimitry Andric }
1320b57cec5SDimitry Andric
setAlreadyVectorized()1330b57cec5SDimitry Andric void LoopVectorizeHints::setAlreadyVectorized() {
1340b57cec5SDimitry Andric LLVMContext &Context = TheLoop->getHeader()->getContext();
1350b57cec5SDimitry Andric
1360b57cec5SDimitry Andric MDNode *IsVectorizedMD = MDNode::get(
1370b57cec5SDimitry Andric Context,
1380b57cec5SDimitry Andric {MDString::get(Context, "llvm.loop.isvectorized"),
1390b57cec5SDimitry Andric ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
1400b57cec5SDimitry Andric MDNode *LoopID = TheLoop->getLoopID();
1410b57cec5SDimitry Andric MDNode *NewLoopID =
1420b57cec5SDimitry Andric makePostTransformationMetadata(Context, LoopID,
1430b57cec5SDimitry Andric {Twine(Prefix(), "vectorize.").str(),
1440b57cec5SDimitry Andric Twine(Prefix(), "interleave.").str()},
1450b57cec5SDimitry Andric {IsVectorizedMD});
1460b57cec5SDimitry Andric TheLoop->setLoopID(NewLoopID);
1470b57cec5SDimitry Andric
1480b57cec5SDimitry Andric // Update internal cache.
1490b57cec5SDimitry Andric IsVectorized.Value = 1;
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric
allowVectorization(Function * F,Loop * L,bool VectorizeOnlyWhenForced) const1520b57cec5SDimitry Andric bool LoopVectorizeHints::allowVectorization(
1530b57cec5SDimitry Andric Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
1540b57cec5SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Disabled) {
1550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
1560b57cec5SDimitry Andric emitRemarkWithHints();
1570b57cec5SDimitry Andric return false;
1580b57cec5SDimitry Andric }
1590b57cec5SDimitry Andric
1600b57cec5SDimitry Andric if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
1610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
1620b57cec5SDimitry Andric emitRemarkWithHints();
1630b57cec5SDimitry Andric return false;
1640b57cec5SDimitry Andric }
1650b57cec5SDimitry Andric
1660b57cec5SDimitry Andric if (getIsVectorized() == 1) {
1670b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
1680b57cec5SDimitry Andric // FIXME: Add interleave.disable metadata. This will allow
1690b57cec5SDimitry Andric // vectorize.disable to be used without disabling the pass and errors
1700b57cec5SDimitry Andric // to differentiate between disabled vectorization and a width of 1.
1710b57cec5SDimitry Andric ORE.emit([&]() {
1720b57cec5SDimitry Andric return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
1730b57cec5SDimitry Andric "AllDisabled", L->getStartLoc(),
1740b57cec5SDimitry Andric L->getHeader())
1750b57cec5SDimitry Andric << "loop not vectorized: vectorization and interleaving are "
1760b57cec5SDimitry Andric "explicitly disabled, or the loop has already been "
1770b57cec5SDimitry Andric "vectorized";
1780b57cec5SDimitry Andric });
1790b57cec5SDimitry Andric return false;
1800b57cec5SDimitry Andric }
1810b57cec5SDimitry Andric
1820b57cec5SDimitry Andric return true;
1830b57cec5SDimitry Andric }
1840b57cec5SDimitry Andric
emitRemarkWithHints() const1850b57cec5SDimitry Andric void LoopVectorizeHints::emitRemarkWithHints() const {
1860b57cec5SDimitry Andric using namespace ore;
1870b57cec5SDimitry Andric
1880b57cec5SDimitry Andric ORE.emit([&]() {
1890b57cec5SDimitry Andric if (Force.Value == LoopVectorizeHints::FK_Disabled)
1900b57cec5SDimitry Andric return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
1910b57cec5SDimitry Andric TheLoop->getStartLoc(),
1920b57cec5SDimitry Andric TheLoop->getHeader())
1930b57cec5SDimitry Andric << "loop not vectorized: vectorization is explicitly disabled";
1940b57cec5SDimitry Andric else {
1950b57cec5SDimitry Andric OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
1960b57cec5SDimitry Andric TheLoop->getStartLoc(), TheLoop->getHeader());
1970b57cec5SDimitry Andric R << "loop not vectorized";
1980b57cec5SDimitry Andric if (Force.Value == LoopVectorizeHints::FK_Enabled) {
1990b57cec5SDimitry Andric R << " (Force=" << NV("Force", true);
2000b57cec5SDimitry Andric if (Width.Value != 0)
201af732203SDimitry Andric R << ", Vector Width=" << NV("VectorWidth", getWidth());
202*5f7ddb14SDimitry Andric if (getInterleave() != 0)
203*5f7ddb14SDimitry Andric R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
2040b57cec5SDimitry Andric R << ")";
2050b57cec5SDimitry Andric }
2060b57cec5SDimitry Andric return R;
2070b57cec5SDimitry Andric }
2080b57cec5SDimitry Andric });
2090b57cec5SDimitry Andric }
2100b57cec5SDimitry Andric
vectorizeAnalysisPassName() const2110b57cec5SDimitry Andric const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
212af732203SDimitry Andric if (getWidth() == ElementCount::getFixed(1))
2130b57cec5SDimitry Andric return LV_NAME;
2140b57cec5SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Disabled)
2150b57cec5SDimitry Andric return LV_NAME;
216af732203SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
2170b57cec5SDimitry Andric return LV_NAME;
2180b57cec5SDimitry Andric return OptimizationRemarkAnalysis::AlwaysPrint;
2190b57cec5SDimitry Andric }
2200b57cec5SDimitry Andric
allowReordering() const221*5f7ddb14SDimitry Andric bool LoopVectorizeHints::allowReordering() const {
222*5f7ddb14SDimitry Andric // Allow the vectorizer to change the order of operations if enabling
223*5f7ddb14SDimitry Andric // loop hints are provided
224*5f7ddb14SDimitry Andric ElementCount EC = getWidth();
225*5f7ddb14SDimitry Andric return HintsAllowReordering &&
226*5f7ddb14SDimitry Andric (getForce() == LoopVectorizeHints::FK_Enabled ||
227*5f7ddb14SDimitry Andric EC.getKnownMinValue() > 1);
228*5f7ddb14SDimitry Andric }
229*5f7ddb14SDimitry Andric
getHintsFromMetadata()2300b57cec5SDimitry Andric void LoopVectorizeHints::getHintsFromMetadata() {
2310b57cec5SDimitry Andric MDNode *LoopID = TheLoop->getLoopID();
2320b57cec5SDimitry Andric if (!LoopID)
2330b57cec5SDimitry Andric return;
2340b57cec5SDimitry Andric
2350b57cec5SDimitry Andric // First operand should refer to the loop id itself.
2360b57cec5SDimitry Andric assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
2370b57cec5SDimitry Andric assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
2380b57cec5SDimitry Andric
2390b57cec5SDimitry Andric for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
2400b57cec5SDimitry Andric const MDString *S = nullptr;
2410b57cec5SDimitry Andric SmallVector<Metadata *, 4> Args;
2420b57cec5SDimitry Andric
2430b57cec5SDimitry Andric // The expected hint is either a MDString or a MDNode with the first
2440b57cec5SDimitry Andric // operand a MDString.
2450b57cec5SDimitry Andric if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
2460b57cec5SDimitry Andric if (!MD || MD->getNumOperands() == 0)
2470b57cec5SDimitry Andric continue;
2480b57cec5SDimitry Andric S = dyn_cast<MDString>(MD->getOperand(0));
2490b57cec5SDimitry Andric for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
2500b57cec5SDimitry Andric Args.push_back(MD->getOperand(i));
2510b57cec5SDimitry Andric } else {
2520b57cec5SDimitry Andric S = dyn_cast<MDString>(LoopID->getOperand(i));
2530b57cec5SDimitry Andric assert(Args.size() == 0 && "too many arguments for MDString");
2540b57cec5SDimitry Andric }
2550b57cec5SDimitry Andric
2560b57cec5SDimitry Andric if (!S)
2570b57cec5SDimitry Andric continue;
2580b57cec5SDimitry Andric
2590b57cec5SDimitry Andric // Check if the hint starts with the loop metadata prefix.
2600b57cec5SDimitry Andric StringRef Name = S->getString();
2610b57cec5SDimitry Andric if (Args.size() == 1)
2620b57cec5SDimitry Andric setHint(Name, Args[0]);
2630b57cec5SDimitry Andric }
2640b57cec5SDimitry Andric }
2650b57cec5SDimitry Andric
setHint(StringRef Name,Metadata * Arg)2660b57cec5SDimitry Andric void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
2670b57cec5SDimitry Andric if (!Name.startswith(Prefix()))
2680b57cec5SDimitry Andric return;
2690b57cec5SDimitry Andric Name = Name.substr(Prefix().size(), StringRef::npos);
2700b57cec5SDimitry Andric
2710b57cec5SDimitry Andric const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
2720b57cec5SDimitry Andric if (!C)
2730b57cec5SDimitry Andric return;
2740b57cec5SDimitry Andric unsigned Val = C->getZExtValue();
2750b57cec5SDimitry Andric
276af732203SDimitry Andric Hint *Hints[] = {&Width, &Interleave, &Force,
277af732203SDimitry Andric &IsVectorized, &Predicate, &Scalable};
2780b57cec5SDimitry Andric for (auto H : Hints) {
2790b57cec5SDimitry Andric if (Name == H->Name) {
2800b57cec5SDimitry Andric if (H->validate(Val))
2810b57cec5SDimitry Andric H->Value = Val;
2820b57cec5SDimitry Andric else
2830b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
2840b57cec5SDimitry Andric break;
2850b57cec5SDimitry Andric }
2860b57cec5SDimitry Andric }
2870b57cec5SDimitry Andric }
2880b57cec5SDimitry Andric
2890b57cec5SDimitry Andric // Return true if the inner loop \p Lp is uniform with regard to the outer loop
2900b57cec5SDimitry Andric // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
2910b57cec5SDimitry Andric // executing the inner loop will execute the same iterations). This check is
2920b57cec5SDimitry Andric // very constrained for now but it will be relaxed in the future. \p Lp is
2930b57cec5SDimitry Andric // considered uniform if it meets all the following conditions:
2940b57cec5SDimitry Andric // 1) it has a canonical IV (starting from 0 and with stride 1),
2950b57cec5SDimitry Andric // 2) its latch terminator is a conditional branch and,
2960b57cec5SDimitry Andric // 3) its latch condition is a compare instruction whose operands are the
2970b57cec5SDimitry Andric // canonical IV and an OuterLp invariant.
2980b57cec5SDimitry Andric // This check doesn't take into account the uniformity of other conditions not
2990b57cec5SDimitry Andric // related to the loop latch because they don't affect the loop uniformity.
3000b57cec5SDimitry Andric //
3010b57cec5SDimitry Andric // NOTE: We decided to keep all these checks and its associated documentation
3020b57cec5SDimitry Andric // together so that we can easily have a picture of the current supported loop
3030b57cec5SDimitry Andric // nests. However, some of the current checks don't depend on \p OuterLp and
3040b57cec5SDimitry Andric // would be redundantly executed for each \p Lp if we invoked this function for
3050b57cec5SDimitry Andric // different candidate outer loops. This is not the case for now because we
3060b57cec5SDimitry Andric // don't currently have the infrastructure to evaluate multiple candidate outer
3070b57cec5SDimitry Andric // loops and \p OuterLp will be a fixed parameter while we only support explicit
3080b57cec5SDimitry Andric // outer loop vectorization. It's also very likely that these checks go away
3090b57cec5SDimitry Andric // before introducing the aforementioned infrastructure. However, if this is not
3100b57cec5SDimitry Andric // the case, we should move the \p OuterLp independent checks to a separate
3110b57cec5SDimitry Andric // function that is only executed once for each \p Lp.
isUniformLoop(Loop * Lp,Loop * OuterLp)3120b57cec5SDimitry Andric static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
3130b57cec5SDimitry Andric assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
3140b57cec5SDimitry Andric
3150b57cec5SDimitry Andric // If Lp is the outer loop, it's uniform by definition.
3160b57cec5SDimitry Andric if (Lp == OuterLp)
3170b57cec5SDimitry Andric return true;
3180b57cec5SDimitry Andric assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
3190b57cec5SDimitry Andric
3200b57cec5SDimitry Andric // 1.
3210b57cec5SDimitry Andric PHINode *IV = Lp->getCanonicalInductionVariable();
3220b57cec5SDimitry Andric if (!IV) {
3230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
3240b57cec5SDimitry Andric return false;
3250b57cec5SDimitry Andric }
3260b57cec5SDimitry Andric
3270b57cec5SDimitry Andric // 2.
3280b57cec5SDimitry Andric BasicBlock *Latch = Lp->getLoopLatch();
3290b57cec5SDimitry Andric auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
3300b57cec5SDimitry Andric if (!LatchBr || LatchBr->isUnconditional()) {
3310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
3320b57cec5SDimitry Andric return false;
3330b57cec5SDimitry Andric }
3340b57cec5SDimitry Andric
3350b57cec5SDimitry Andric // 3.
3360b57cec5SDimitry Andric auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
3370b57cec5SDimitry Andric if (!LatchCmp) {
3380b57cec5SDimitry Andric LLVM_DEBUG(
3390b57cec5SDimitry Andric dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
3400b57cec5SDimitry Andric return false;
3410b57cec5SDimitry Andric }
3420b57cec5SDimitry Andric
3430b57cec5SDimitry Andric Value *CondOp0 = LatchCmp->getOperand(0);
3440b57cec5SDimitry Andric Value *CondOp1 = LatchCmp->getOperand(1);
3450b57cec5SDimitry Andric Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
3460b57cec5SDimitry Andric if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
3470b57cec5SDimitry Andric !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
3480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
3490b57cec5SDimitry Andric return false;
3500b57cec5SDimitry Andric }
3510b57cec5SDimitry Andric
3520b57cec5SDimitry Andric return true;
3530b57cec5SDimitry Andric }
3540b57cec5SDimitry Andric
3550b57cec5SDimitry Andric // Return true if \p Lp and all its nested loops are uniform with regard to \p
3560b57cec5SDimitry Andric // OuterLp.
isUniformLoopNest(Loop * Lp,Loop * OuterLp)3570b57cec5SDimitry Andric static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
3580b57cec5SDimitry Andric if (!isUniformLoop(Lp, OuterLp))
3590b57cec5SDimitry Andric return false;
3600b57cec5SDimitry Andric
3610b57cec5SDimitry Andric // Check if nested loops are uniform.
3620b57cec5SDimitry Andric for (Loop *SubLp : *Lp)
3630b57cec5SDimitry Andric if (!isUniformLoopNest(SubLp, OuterLp))
3640b57cec5SDimitry Andric return false;
3650b57cec5SDimitry Andric
3660b57cec5SDimitry Andric return true;
3670b57cec5SDimitry Andric }
3680b57cec5SDimitry Andric
3690b57cec5SDimitry Andric /// Check whether it is safe to if-convert this phi node.
3700b57cec5SDimitry Andric ///
3710b57cec5SDimitry Andric /// Phi nodes with constant expressions that can trap are not safe to if
3720b57cec5SDimitry Andric /// convert.
canIfConvertPHINodes(BasicBlock * BB)3730b57cec5SDimitry Andric static bool canIfConvertPHINodes(BasicBlock *BB) {
3740b57cec5SDimitry Andric for (PHINode &Phi : BB->phis()) {
3750b57cec5SDimitry Andric for (Value *V : Phi.incoming_values())
3760b57cec5SDimitry Andric if (auto *C = dyn_cast<Constant>(V))
3770b57cec5SDimitry Andric if (C->canTrap())
3780b57cec5SDimitry Andric return false;
3790b57cec5SDimitry Andric }
3800b57cec5SDimitry Andric return true;
3810b57cec5SDimitry Andric }
3820b57cec5SDimitry Andric
convertPointerToIntegerType(const DataLayout & DL,Type * Ty)3830b57cec5SDimitry Andric static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
3840b57cec5SDimitry Andric if (Ty->isPointerTy())
3850b57cec5SDimitry Andric return DL.getIntPtrType(Ty);
3860b57cec5SDimitry Andric
3870b57cec5SDimitry Andric // It is possible that char's or short's overflow when we ask for the loop's
3880b57cec5SDimitry Andric // trip count, work around this by changing the type size.
3890b57cec5SDimitry Andric if (Ty->getScalarSizeInBits() < 32)
3900b57cec5SDimitry Andric return Type::getInt32Ty(Ty->getContext());
3910b57cec5SDimitry Andric
3920b57cec5SDimitry Andric return Ty;
3930b57cec5SDimitry Andric }
3940b57cec5SDimitry Andric
getWiderType(const DataLayout & DL,Type * Ty0,Type * Ty1)3950b57cec5SDimitry Andric static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
3960b57cec5SDimitry Andric Ty0 = convertPointerToIntegerType(DL, Ty0);
3970b57cec5SDimitry Andric Ty1 = convertPointerToIntegerType(DL, Ty1);
3980b57cec5SDimitry Andric if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
3990b57cec5SDimitry Andric return Ty0;
4000b57cec5SDimitry Andric return Ty1;
4010b57cec5SDimitry Andric }
4020b57cec5SDimitry Andric
4030b57cec5SDimitry Andric /// Check that the instruction has outside loop users and is not an
4040b57cec5SDimitry Andric /// identified reduction variable.
hasOutsideLoopUser(const Loop * TheLoop,Instruction * Inst,SmallPtrSetImpl<Value * > & AllowedExit)4050b57cec5SDimitry Andric static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
4060b57cec5SDimitry Andric SmallPtrSetImpl<Value *> &AllowedExit) {
4070b57cec5SDimitry Andric // Reductions, Inductions and non-header phis are allowed to have exit users. All
4080b57cec5SDimitry Andric // other instructions must not have external users.
4090b57cec5SDimitry Andric if (!AllowedExit.count(Inst))
4100b57cec5SDimitry Andric // Check that all of the users of the loop are inside the BB.
4110b57cec5SDimitry Andric for (User *U : Inst->users()) {
4120b57cec5SDimitry Andric Instruction *UI = cast<Instruction>(U);
4130b57cec5SDimitry Andric // This user may be a reduction exit value.
4140b57cec5SDimitry Andric if (!TheLoop->contains(UI)) {
4150b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
4160b57cec5SDimitry Andric return true;
4170b57cec5SDimitry Andric }
4180b57cec5SDimitry Andric }
4190b57cec5SDimitry Andric return false;
4200b57cec5SDimitry Andric }
4210b57cec5SDimitry Andric
isConsecutivePtr(Value * Ptr) const422*5f7ddb14SDimitry Andric int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
4230b57cec5SDimitry Andric const ValueToValueMap &Strides =
4240b57cec5SDimitry Andric getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
4250b57cec5SDimitry Andric
426af732203SDimitry Andric Function *F = TheLoop->getHeader()->getParent();
427af732203SDimitry Andric bool OptForSize = F->hasOptSize() ||
428af732203SDimitry Andric llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
429af732203SDimitry Andric PGSOQueryType::IRPass);
430af732203SDimitry Andric bool CanAddPredicate = !OptForSize;
4318bcb0991SDimitry Andric int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
4320b57cec5SDimitry Andric if (Stride == 1 || Stride == -1)
4330b57cec5SDimitry Andric return Stride;
4340b57cec5SDimitry Andric return 0;
4350b57cec5SDimitry Andric }
4360b57cec5SDimitry Andric
isUniform(Value * V)4370b57cec5SDimitry Andric bool LoopVectorizationLegality::isUniform(Value *V) {
4380b57cec5SDimitry Andric return LAI->isUniform(V);
4390b57cec5SDimitry Andric }
4400b57cec5SDimitry Andric
canVectorizeOuterLoop()4410b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeOuterLoop() {
442af732203SDimitry Andric assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
4430b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
4440b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
4450b57cec5SDimitry Andric bool Result = true;
4460b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
4470b57cec5SDimitry Andric
4480b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
4490b57cec5SDimitry Andric // Check whether the BB terminator is a BranchInst. Any other terminator is
4500b57cec5SDimitry Andric // not supported yet.
4510b57cec5SDimitry Andric auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
4520b57cec5SDimitry Andric if (!Br) {
4530b57cec5SDimitry Andric reportVectorizationFailure("Unsupported basic block terminator",
4540b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
4558bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
4560b57cec5SDimitry Andric if (DoExtraAnalysis)
4570b57cec5SDimitry Andric Result = false;
4580b57cec5SDimitry Andric else
4590b57cec5SDimitry Andric return false;
4600b57cec5SDimitry Andric }
4610b57cec5SDimitry Andric
4620b57cec5SDimitry Andric // Check whether the BranchInst is a supported one. Only unconditional
4630b57cec5SDimitry Andric // branches, conditional branches with an outer loop invariant condition or
4640b57cec5SDimitry Andric // backedges are supported.
4650b57cec5SDimitry Andric // FIXME: We skip these checks when VPlan predication is enabled as we
4660b57cec5SDimitry Andric // want to allow divergent branches. This whole check will be removed
4670b57cec5SDimitry Andric // once VPlan predication is on by default.
4680b57cec5SDimitry Andric if (!EnableVPlanPredication && Br && Br->isConditional() &&
4690b57cec5SDimitry Andric !TheLoop->isLoopInvariant(Br->getCondition()) &&
4700b57cec5SDimitry Andric !LI->isLoopHeader(Br->getSuccessor(0)) &&
4710b57cec5SDimitry Andric !LI->isLoopHeader(Br->getSuccessor(1))) {
4720b57cec5SDimitry Andric reportVectorizationFailure("Unsupported conditional branch",
4730b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
4748bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
4750b57cec5SDimitry Andric if (DoExtraAnalysis)
4760b57cec5SDimitry Andric Result = false;
4770b57cec5SDimitry Andric else
4780b57cec5SDimitry Andric return false;
4790b57cec5SDimitry Andric }
4800b57cec5SDimitry Andric }
4810b57cec5SDimitry Andric
4820b57cec5SDimitry Andric // Check whether inner loops are uniform. At this point, we only support
4830b57cec5SDimitry Andric // simple outer loops scenarios with uniform nested loops.
4840b57cec5SDimitry Andric if (!isUniformLoopNest(TheLoop /*loop nest*/,
4850b57cec5SDimitry Andric TheLoop /*context outer loop*/)) {
4860b57cec5SDimitry Andric reportVectorizationFailure("Outer loop contains divergent loops",
4870b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
4888bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
4890b57cec5SDimitry Andric if (DoExtraAnalysis)
4900b57cec5SDimitry Andric Result = false;
4910b57cec5SDimitry Andric else
4920b57cec5SDimitry Andric return false;
4930b57cec5SDimitry Andric }
4940b57cec5SDimitry Andric
4950b57cec5SDimitry Andric // Check whether we are able to set up outer loop induction.
4960b57cec5SDimitry Andric if (!setupOuterLoopInductions()) {
4970b57cec5SDimitry Andric reportVectorizationFailure("Unsupported outer loop Phi(s)",
4980b57cec5SDimitry Andric "Unsupported outer loop Phi(s)",
4998bcb0991SDimitry Andric "UnsupportedPhi", ORE, TheLoop);
5000b57cec5SDimitry Andric if (DoExtraAnalysis)
5010b57cec5SDimitry Andric Result = false;
5020b57cec5SDimitry Andric else
5030b57cec5SDimitry Andric return false;
5040b57cec5SDimitry Andric }
5050b57cec5SDimitry Andric
5060b57cec5SDimitry Andric return Result;
5070b57cec5SDimitry Andric }
5080b57cec5SDimitry Andric
addInductionPhi(PHINode * Phi,const InductionDescriptor & ID,SmallPtrSetImpl<Value * > & AllowedExit)5090b57cec5SDimitry Andric void LoopVectorizationLegality::addInductionPhi(
5100b57cec5SDimitry Andric PHINode *Phi, const InductionDescriptor &ID,
5110b57cec5SDimitry Andric SmallPtrSetImpl<Value *> &AllowedExit) {
5120b57cec5SDimitry Andric Inductions[Phi] = ID;
5130b57cec5SDimitry Andric
5140b57cec5SDimitry Andric // In case this induction also comes with casts that we know we can ignore
5150b57cec5SDimitry Andric // in the vectorized loop body, record them here. All casts could be recorded
5160b57cec5SDimitry Andric // here for ignoring, but suffices to record only the first (as it is the
5170b57cec5SDimitry Andric // only one that may bw used outside the cast sequence).
5180b57cec5SDimitry Andric const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
5190b57cec5SDimitry Andric if (!Casts.empty())
5200b57cec5SDimitry Andric InductionCastsToIgnore.insert(*Casts.begin());
5210b57cec5SDimitry Andric
5220b57cec5SDimitry Andric Type *PhiTy = Phi->getType();
5230b57cec5SDimitry Andric const DataLayout &DL = Phi->getModule()->getDataLayout();
5240b57cec5SDimitry Andric
5250b57cec5SDimitry Andric // Get the widest type.
5260b57cec5SDimitry Andric if (!PhiTy->isFloatingPointTy()) {
5270b57cec5SDimitry Andric if (!WidestIndTy)
5280b57cec5SDimitry Andric WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
5290b57cec5SDimitry Andric else
5300b57cec5SDimitry Andric WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
5310b57cec5SDimitry Andric }
5320b57cec5SDimitry Andric
5330b57cec5SDimitry Andric // Int inductions are special because we only allow one IV.
5340b57cec5SDimitry Andric if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
5350b57cec5SDimitry Andric ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
5360b57cec5SDimitry Andric isa<Constant>(ID.getStartValue()) &&
5370b57cec5SDimitry Andric cast<Constant>(ID.getStartValue())->isNullValue()) {
5380b57cec5SDimitry Andric
5390b57cec5SDimitry Andric // Use the phi node with the widest type as induction. Use the last
5400b57cec5SDimitry Andric // one if there are multiple (no good reason for doing this other
5410b57cec5SDimitry Andric // than it is expedient). We've checked that it begins at zero and
5420b57cec5SDimitry Andric // steps by one, so this is a canonical induction variable.
5430b57cec5SDimitry Andric if (!PrimaryInduction || PhiTy == WidestIndTy)
5440b57cec5SDimitry Andric PrimaryInduction = Phi;
5450b57cec5SDimitry Andric }
5460b57cec5SDimitry Andric
5470b57cec5SDimitry Andric // Both the PHI node itself, and the "post-increment" value feeding
5480b57cec5SDimitry Andric // back into the PHI node may have external users.
5490b57cec5SDimitry Andric // We can allow those uses, except if the SCEVs we have for them rely
5500b57cec5SDimitry Andric // on predicates that only hold within the loop, since allowing the exit
5510b57cec5SDimitry Andric // currently means re-using this SCEV outside the loop (see PR33706 for more
5520b57cec5SDimitry Andric // details).
5530b57cec5SDimitry Andric if (PSE.getUnionPredicate().isAlwaysTrue()) {
5540b57cec5SDimitry Andric AllowedExit.insert(Phi);
5550b57cec5SDimitry Andric AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
5560b57cec5SDimitry Andric }
5570b57cec5SDimitry Andric
5580b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
5590b57cec5SDimitry Andric }
5600b57cec5SDimitry Andric
setupOuterLoopInductions()5610b57cec5SDimitry Andric bool LoopVectorizationLegality::setupOuterLoopInductions() {
5620b57cec5SDimitry Andric BasicBlock *Header = TheLoop->getHeader();
5630b57cec5SDimitry Andric
5640b57cec5SDimitry Andric // Returns true if a given Phi is a supported induction.
5650b57cec5SDimitry Andric auto isSupportedPhi = [&](PHINode &Phi) -> bool {
5660b57cec5SDimitry Andric InductionDescriptor ID;
5670b57cec5SDimitry Andric if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
5680b57cec5SDimitry Andric ID.getKind() == InductionDescriptor::IK_IntInduction) {
5690b57cec5SDimitry Andric addInductionPhi(&Phi, ID, AllowedExit);
5700b57cec5SDimitry Andric return true;
5710b57cec5SDimitry Andric } else {
5720b57cec5SDimitry Andric // Bail out for any Phi in the outer loop header that is not a supported
5730b57cec5SDimitry Andric // induction.
5740b57cec5SDimitry Andric LLVM_DEBUG(
5750b57cec5SDimitry Andric dbgs()
5760b57cec5SDimitry Andric << "LV: Found unsupported PHI for outer loop vectorization.\n");
5770b57cec5SDimitry Andric return false;
5780b57cec5SDimitry Andric }
5790b57cec5SDimitry Andric };
5800b57cec5SDimitry Andric
5810b57cec5SDimitry Andric if (llvm::all_of(Header->phis(), isSupportedPhi))
5820b57cec5SDimitry Andric return true;
5830b57cec5SDimitry Andric else
5840b57cec5SDimitry Andric return false;
5850b57cec5SDimitry Andric }
5860b57cec5SDimitry Andric
5875ffd83dbSDimitry Andric /// Checks if a function is scalarizable according to the TLI, in
5885ffd83dbSDimitry Andric /// the sense that it should be vectorized and then expanded in
5895ffd83dbSDimitry Andric /// multiple scalar calls. This is represented in the
5905ffd83dbSDimitry Andric /// TLI via mappings that do not specify a vector name, as in the
5915ffd83dbSDimitry Andric /// following example:
5925ffd83dbSDimitry Andric ///
5935ffd83dbSDimitry Andric /// const VecDesc VecIntrinsics[] = {
5945ffd83dbSDimitry Andric /// {"llvm.phx.abs.i32", "", 4}
5955ffd83dbSDimitry Andric /// };
isTLIScalarize(const TargetLibraryInfo & TLI,const CallInst & CI)5965ffd83dbSDimitry Andric static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
5975ffd83dbSDimitry Andric const StringRef ScalarName = CI.getCalledFunction()->getName();
5985ffd83dbSDimitry Andric bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
5995ffd83dbSDimitry Andric // Check that all known VFs are not associated to a vector
6005ffd83dbSDimitry Andric // function, i.e. the vector name is emty.
601*5f7ddb14SDimitry Andric if (Scalarize) {
602*5f7ddb14SDimitry Andric ElementCount WidestFixedVF, WidestScalableVF;
603*5f7ddb14SDimitry Andric TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
604*5f7ddb14SDimitry Andric for (ElementCount VF = ElementCount::getFixed(2);
605*5f7ddb14SDimitry Andric ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
6065ffd83dbSDimitry Andric Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
607*5f7ddb14SDimitry Andric for (ElementCount VF = ElementCount::getScalable(1);
608*5f7ddb14SDimitry Andric ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
609*5f7ddb14SDimitry Andric Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
610*5f7ddb14SDimitry Andric assert((WidestScalableVF.isZero() || !Scalarize) &&
611*5f7ddb14SDimitry Andric "Caller may decide to scalarize a variant using a scalable VF");
6125ffd83dbSDimitry Andric }
6135ffd83dbSDimitry Andric return Scalarize;
6145ffd83dbSDimitry Andric }
6155ffd83dbSDimitry Andric
canVectorizeInstrs()6160b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeInstrs() {
6170b57cec5SDimitry Andric BasicBlock *Header = TheLoop->getHeader();
6180b57cec5SDimitry Andric
6190b57cec5SDimitry Andric // For each block in the loop.
6200b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
6210b57cec5SDimitry Andric // Scan the instructions in the block and look for hazards.
6220b57cec5SDimitry Andric for (Instruction &I : *BB) {
6230b57cec5SDimitry Andric if (auto *Phi = dyn_cast<PHINode>(&I)) {
6240b57cec5SDimitry Andric Type *PhiTy = Phi->getType();
6250b57cec5SDimitry Andric // Check that this PHI type is allowed.
6260b57cec5SDimitry Andric if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
6270b57cec5SDimitry Andric !PhiTy->isPointerTy()) {
6280b57cec5SDimitry Andric reportVectorizationFailure("Found a non-int non-pointer PHI",
6290b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
6308bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
6310b57cec5SDimitry Andric return false;
6320b57cec5SDimitry Andric }
6330b57cec5SDimitry Andric
6340b57cec5SDimitry Andric // If this PHINode is not in the header block, then we know that we
6350b57cec5SDimitry Andric // can convert it to select during if-conversion. No need to check if
6360b57cec5SDimitry Andric // the PHIs in this block are induction or reduction variables.
6370b57cec5SDimitry Andric if (BB != Header) {
6380b57cec5SDimitry Andric // Non-header phi nodes that have outside uses can be vectorized. Add
6390b57cec5SDimitry Andric // them to the list of allowed exits.
6400b57cec5SDimitry Andric // Unsafe cyclic dependencies with header phis are identified during
6410b57cec5SDimitry Andric // legalization for reduction, induction and first order
6420b57cec5SDimitry Andric // recurrences.
6430b57cec5SDimitry Andric AllowedExit.insert(&I);
6440b57cec5SDimitry Andric continue;
6450b57cec5SDimitry Andric }
6460b57cec5SDimitry Andric
6470b57cec5SDimitry Andric // We only allow if-converted PHIs with exactly two incoming values.
6480b57cec5SDimitry Andric if (Phi->getNumIncomingValues() != 2) {
6490b57cec5SDimitry Andric reportVectorizationFailure("Found an invalid PHI",
6500b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
6518bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop, Phi);
6520b57cec5SDimitry Andric return false;
6530b57cec5SDimitry Andric }
6540b57cec5SDimitry Andric
6550b57cec5SDimitry Andric RecurrenceDescriptor RedDes;
6560b57cec5SDimitry Andric if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
6570b57cec5SDimitry Andric DT)) {
658*5f7ddb14SDimitry Andric Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
6590b57cec5SDimitry Andric AllowedExit.insert(RedDes.getLoopExitInstr());
6600b57cec5SDimitry Andric Reductions[Phi] = RedDes;
6610b57cec5SDimitry Andric continue;
6620b57cec5SDimitry Andric }
6630b57cec5SDimitry Andric
6640b57cec5SDimitry Andric // TODO: Instead of recording the AllowedExit, it would be good to record the
6650b57cec5SDimitry Andric // complementary set: NotAllowedExit. These include (but may not be
6660b57cec5SDimitry Andric // limited to):
6670b57cec5SDimitry Andric // 1. Reduction phis as they represent the one-before-last value, which
6680b57cec5SDimitry Andric // is not available when vectorized
6690b57cec5SDimitry Andric // 2. Induction phis and increment when SCEV predicates cannot be used
6700b57cec5SDimitry Andric // outside the loop - see addInductionPhi
6710b57cec5SDimitry Andric // 3. Non-Phis with outside uses when SCEV predicates cannot be used
6720b57cec5SDimitry Andric // outside the loop - see call to hasOutsideLoopUser in the non-phi
6730b57cec5SDimitry Andric // handling below
6740b57cec5SDimitry Andric // 4. FirstOrderRecurrence phis that can possibly be handled by
6750b57cec5SDimitry Andric // extraction.
6760b57cec5SDimitry Andric // By recording these, we can then reason about ways to vectorize each
6770b57cec5SDimitry Andric // of these NotAllowedExit.
6780b57cec5SDimitry Andric InductionDescriptor ID;
6790b57cec5SDimitry Andric if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
6800b57cec5SDimitry Andric addInductionPhi(Phi, ID, AllowedExit);
681*5f7ddb14SDimitry Andric Requirements->addExactFPMathInst(ID.getExactFPMathInst());
6820b57cec5SDimitry Andric continue;
6830b57cec5SDimitry Andric }
6840b57cec5SDimitry Andric
6850b57cec5SDimitry Andric if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
6860b57cec5SDimitry Andric SinkAfter, DT)) {
6875ffd83dbSDimitry Andric AllowedExit.insert(Phi);
6880b57cec5SDimitry Andric FirstOrderRecurrences.insert(Phi);
6890b57cec5SDimitry Andric continue;
6900b57cec5SDimitry Andric }
6910b57cec5SDimitry Andric
6920b57cec5SDimitry Andric // As a last resort, coerce the PHI to a AddRec expression
6930b57cec5SDimitry Andric // and re-try classifying it a an induction PHI.
6940b57cec5SDimitry Andric if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
6950b57cec5SDimitry Andric addInductionPhi(Phi, ID, AllowedExit);
6960b57cec5SDimitry Andric continue;
6970b57cec5SDimitry Andric }
6980b57cec5SDimitry Andric
6990b57cec5SDimitry Andric reportVectorizationFailure("Found an unidentified PHI",
7000b57cec5SDimitry Andric "value that could not be identified as "
7010b57cec5SDimitry Andric "reduction is used outside the loop",
7028bcb0991SDimitry Andric "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
7030b57cec5SDimitry Andric return false;
7040b57cec5SDimitry Andric } // end of PHI handling
7050b57cec5SDimitry Andric
7060b57cec5SDimitry Andric // We handle calls that:
7070b57cec5SDimitry Andric // * Are debug info intrinsics.
7080b57cec5SDimitry Andric // * Have a mapping to an IR intrinsic.
7090b57cec5SDimitry Andric // * Have a vector version available.
7100b57cec5SDimitry Andric auto *CI = dyn_cast<CallInst>(&I);
7115ffd83dbSDimitry Andric
7120b57cec5SDimitry Andric if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
7130b57cec5SDimitry Andric !isa<DbgInfoIntrinsic>(CI) &&
7140b57cec5SDimitry Andric !(CI->getCalledFunction() && TLI &&
7155ffd83dbSDimitry Andric (!VFDatabase::getMappings(*CI).empty() ||
7165ffd83dbSDimitry Andric isTLIScalarize(*TLI, *CI)))) {
7170b57cec5SDimitry Andric // If the call is a recognized math libary call, it is likely that
7180b57cec5SDimitry Andric // we can vectorize it given loosened floating-point constraints.
7190b57cec5SDimitry Andric LibFunc Func;
7200b57cec5SDimitry Andric bool IsMathLibCall =
7210b57cec5SDimitry Andric TLI && CI->getCalledFunction() &&
7220b57cec5SDimitry Andric CI->getType()->isFloatingPointTy() &&
7230b57cec5SDimitry Andric TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
7240b57cec5SDimitry Andric TLI->hasOptimizedCodeGen(Func);
7250b57cec5SDimitry Andric
7260b57cec5SDimitry Andric if (IsMathLibCall) {
7270b57cec5SDimitry Andric // TODO: Ideally, we should not use clang-specific language here,
7280b57cec5SDimitry Andric // but it's hard to provide meaningful yet generic advice.
7290b57cec5SDimitry Andric // Also, should this be guarded by allowExtraAnalysis() and/or be part
7300b57cec5SDimitry Andric // of the returned info from isFunctionVectorizable()?
7315ffd83dbSDimitry Andric reportVectorizationFailure(
7325ffd83dbSDimitry Andric "Found a non-intrinsic callsite",
7330b57cec5SDimitry Andric "library call cannot be vectorized. "
7340b57cec5SDimitry Andric "Try compiling with -fno-math-errno, -ffast-math, "
7350b57cec5SDimitry Andric "or similar flags",
7368bcb0991SDimitry Andric "CantVectorizeLibcall", ORE, TheLoop, CI);
7370b57cec5SDimitry Andric } else {
7380b57cec5SDimitry Andric reportVectorizationFailure("Found a non-intrinsic callsite",
7390b57cec5SDimitry Andric "call instruction cannot be vectorized",
7408bcb0991SDimitry Andric "CantVectorizeLibcall", ORE, TheLoop, CI);
7410b57cec5SDimitry Andric }
7420b57cec5SDimitry Andric return false;
7430b57cec5SDimitry Andric }
7440b57cec5SDimitry Andric
7450b57cec5SDimitry Andric // Some intrinsics have scalar arguments and should be same in order for
7460b57cec5SDimitry Andric // them to be vectorized (i.e. loop invariant).
7470b57cec5SDimitry Andric if (CI) {
7480b57cec5SDimitry Andric auto *SE = PSE.getSE();
7490b57cec5SDimitry Andric Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
7500b57cec5SDimitry Andric for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
7510b57cec5SDimitry Andric if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
7520b57cec5SDimitry Andric if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
7530b57cec5SDimitry Andric reportVectorizationFailure("Found unvectorizable intrinsic",
7540b57cec5SDimitry Andric "intrinsic instruction cannot be vectorized",
7558bcb0991SDimitry Andric "CantVectorizeIntrinsic", ORE, TheLoop, CI);
7560b57cec5SDimitry Andric return false;
7570b57cec5SDimitry Andric }
7580b57cec5SDimitry Andric }
7590b57cec5SDimitry Andric }
7600b57cec5SDimitry Andric
7610b57cec5SDimitry Andric // Check that the instruction return type is vectorizable.
7620b57cec5SDimitry Andric // Also, we can't vectorize extractelement instructions.
7630b57cec5SDimitry Andric if ((!VectorType::isValidElementType(I.getType()) &&
7640b57cec5SDimitry Andric !I.getType()->isVoidTy()) ||
7650b57cec5SDimitry Andric isa<ExtractElementInst>(I)) {
7660b57cec5SDimitry Andric reportVectorizationFailure("Found unvectorizable type",
7670b57cec5SDimitry Andric "instruction return type cannot be vectorized",
7688bcb0991SDimitry Andric "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
7690b57cec5SDimitry Andric return false;
7700b57cec5SDimitry Andric }
7710b57cec5SDimitry Andric
7720b57cec5SDimitry Andric // Check that the stored type is vectorizable.
7730b57cec5SDimitry Andric if (auto *ST = dyn_cast<StoreInst>(&I)) {
7740b57cec5SDimitry Andric Type *T = ST->getValueOperand()->getType();
7750b57cec5SDimitry Andric if (!VectorType::isValidElementType(T)) {
7760b57cec5SDimitry Andric reportVectorizationFailure("Store instruction cannot be vectorized",
7770b57cec5SDimitry Andric "store instruction cannot be vectorized",
7788bcb0991SDimitry Andric "CantVectorizeStore", ORE, TheLoop, ST);
7790b57cec5SDimitry Andric return false;
7800b57cec5SDimitry Andric }
7810b57cec5SDimitry Andric
7820b57cec5SDimitry Andric // For nontemporal stores, check that a nontemporal vector version is
7830b57cec5SDimitry Andric // supported on the target.
7840b57cec5SDimitry Andric if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
7850b57cec5SDimitry Andric // Arbitrarily try a vector of 2 elements.
786af732203SDimitry Andric auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
7870b57cec5SDimitry Andric assert(VecTy && "did not find vectorized version of stored type");
7885ffd83dbSDimitry Andric if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
7890b57cec5SDimitry Andric reportVectorizationFailure(
7900b57cec5SDimitry Andric "nontemporal store instruction cannot be vectorized",
7910b57cec5SDimitry Andric "nontemporal store instruction cannot be vectorized",
7928bcb0991SDimitry Andric "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
7930b57cec5SDimitry Andric return false;
7940b57cec5SDimitry Andric }
7950b57cec5SDimitry Andric }
7960b57cec5SDimitry Andric
7970b57cec5SDimitry Andric } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
7980b57cec5SDimitry Andric if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
7990b57cec5SDimitry Andric // For nontemporal loads, check that a nontemporal vector version is
8000b57cec5SDimitry Andric // supported on the target (arbitrarily try a vector of 2 elements).
801af732203SDimitry Andric auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
8020b57cec5SDimitry Andric assert(VecTy && "did not find vectorized version of load type");
8035ffd83dbSDimitry Andric if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
8040b57cec5SDimitry Andric reportVectorizationFailure(
8050b57cec5SDimitry Andric "nontemporal load instruction cannot be vectorized",
8060b57cec5SDimitry Andric "nontemporal load instruction cannot be vectorized",
8078bcb0991SDimitry Andric "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
8080b57cec5SDimitry Andric return false;
8090b57cec5SDimitry Andric }
8100b57cec5SDimitry Andric }
8110b57cec5SDimitry Andric
8120b57cec5SDimitry Andric // FP instructions can allow unsafe algebra, thus vectorizable by
8130b57cec5SDimitry Andric // non-IEEE-754 compliant SIMD units.
8140b57cec5SDimitry Andric // This applies to floating-point math operations and calls, not memory
8150b57cec5SDimitry Andric // operations, shuffles, or casts, as they don't change precision or
8160b57cec5SDimitry Andric // semantics.
8170b57cec5SDimitry Andric } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
8180b57cec5SDimitry Andric !I.isFast()) {
8190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
8200b57cec5SDimitry Andric Hints->setPotentiallyUnsafe();
8210b57cec5SDimitry Andric }
8220b57cec5SDimitry Andric
8230b57cec5SDimitry Andric // Reduction instructions are allowed to have exit users.
8240b57cec5SDimitry Andric // All other instructions must not have external users.
8250b57cec5SDimitry Andric if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
8260b57cec5SDimitry Andric // We can safely vectorize loops where instructions within the loop are
8270b57cec5SDimitry Andric // used outside the loop only if the SCEV predicates within the loop is
8280b57cec5SDimitry Andric // same as outside the loop. Allowing the exit means reusing the SCEV
8290b57cec5SDimitry Andric // outside the loop.
8300b57cec5SDimitry Andric if (PSE.getUnionPredicate().isAlwaysTrue()) {
8310b57cec5SDimitry Andric AllowedExit.insert(&I);
8320b57cec5SDimitry Andric continue;
8330b57cec5SDimitry Andric }
8340b57cec5SDimitry Andric reportVectorizationFailure("Value cannot be used outside the loop",
8350b57cec5SDimitry Andric "value cannot be used outside the loop",
8368bcb0991SDimitry Andric "ValueUsedOutsideLoop", ORE, TheLoop, &I);
8370b57cec5SDimitry Andric return false;
8380b57cec5SDimitry Andric }
8390b57cec5SDimitry Andric } // next instr.
8400b57cec5SDimitry Andric }
8410b57cec5SDimitry Andric
8420b57cec5SDimitry Andric if (!PrimaryInduction) {
8430b57cec5SDimitry Andric if (Inductions.empty()) {
8440b57cec5SDimitry Andric reportVectorizationFailure("Did not find one integer induction var",
8450b57cec5SDimitry Andric "loop induction variable could not be identified",
8468bcb0991SDimitry Andric "NoInductionVariable", ORE, TheLoop);
8470b57cec5SDimitry Andric return false;
8480b57cec5SDimitry Andric } else if (!WidestIndTy) {
8490b57cec5SDimitry Andric reportVectorizationFailure("Did not find one integer induction var",
8500b57cec5SDimitry Andric "integer loop induction variable could not be identified",
8518bcb0991SDimitry Andric "NoIntegerInductionVariable", ORE, TheLoop);
8520b57cec5SDimitry Andric return false;
8530b57cec5SDimitry Andric } else {
8540b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
8550b57cec5SDimitry Andric }
8560b57cec5SDimitry Andric }
8570b57cec5SDimitry Andric
858480093f4SDimitry Andric // For first order recurrences, we use the previous value (incoming value from
859480093f4SDimitry Andric // the latch) to check if it dominates all users of the recurrence. Bail out
860480093f4SDimitry Andric // if we have to sink such an instruction for another recurrence, as the
861480093f4SDimitry Andric // dominance requirement may not hold after sinking.
862480093f4SDimitry Andric BasicBlock *LoopLatch = TheLoop->getLoopLatch();
863480093f4SDimitry Andric if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
864480093f4SDimitry Andric Instruction *V =
865480093f4SDimitry Andric cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
866480093f4SDimitry Andric return SinkAfter.find(V) != SinkAfter.end();
867480093f4SDimitry Andric }))
868480093f4SDimitry Andric return false;
869480093f4SDimitry Andric
8700b57cec5SDimitry Andric // Now we know the widest induction type, check if our found induction
8710b57cec5SDimitry Andric // is the same size. If it's not, unset it here and InnerLoopVectorizer
8720b57cec5SDimitry Andric // will create another.
8730b57cec5SDimitry Andric if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
8740b57cec5SDimitry Andric PrimaryInduction = nullptr;
8750b57cec5SDimitry Andric
8760b57cec5SDimitry Andric return true;
8770b57cec5SDimitry Andric }
8780b57cec5SDimitry Andric
canVectorizeMemory()8790b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeMemory() {
8800b57cec5SDimitry Andric LAI = &(*GetLAA)(*TheLoop);
8810b57cec5SDimitry Andric const OptimizationRemarkAnalysis *LAR = LAI->getReport();
8820b57cec5SDimitry Andric if (LAR) {
8830b57cec5SDimitry Andric ORE->emit([&]() {
8840b57cec5SDimitry Andric return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
8850b57cec5SDimitry Andric "loop not vectorized: ", *LAR);
8860b57cec5SDimitry Andric });
8870b57cec5SDimitry Andric }
888*5f7ddb14SDimitry Andric
8890b57cec5SDimitry Andric if (!LAI->canVectorizeMemory())
8900b57cec5SDimitry Andric return false;
8910b57cec5SDimitry Andric
8920b57cec5SDimitry Andric if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
8930b57cec5SDimitry Andric reportVectorizationFailure("Stores to a uniform address",
8940b57cec5SDimitry Andric "write to a loop invariant address could not be vectorized",
8958bcb0991SDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
8960b57cec5SDimitry Andric return false;
8970b57cec5SDimitry Andric }
898*5f7ddb14SDimitry Andric
8990b57cec5SDimitry Andric Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
9000b57cec5SDimitry Andric PSE.addPredicate(LAI->getPSE().getUnionPredicate());
9010b57cec5SDimitry Andric return true;
9020b57cec5SDimitry Andric }
9030b57cec5SDimitry Andric
canVectorizeFPMath(bool EnableStrictReductions)904*5f7ddb14SDimitry Andric bool LoopVectorizationLegality::canVectorizeFPMath(
905*5f7ddb14SDimitry Andric bool EnableStrictReductions) {
906*5f7ddb14SDimitry Andric
907*5f7ddb14SDimitry Andric // First check if there is any ExactFP math or if we allow reassociations
908*5f7ddb14SDimitry Andric if (!Requirements->getExactFPInst() || Hints->allowReordering())
909*5f7ddb14SDimitry Andric return true;
910*5f7ddb14SDimitry Andric
911*5f7ddb14SDimitry Andric // If the above is false, we have ExactFPMath & do not allow reordering.
912*5f7ddb14SDimitry Andric // If the EnableStrictReductions flag is set, first check if we have any
913*5f7ddb14SDimitry Andric // Exact FP induction vars, which we cannot vectorize.
914*5f7ddb14SDimitry Andric if (!EnableStrictReductions ||
915*5f7ddb14SDimitry Andric any_of(getInductionVars(), [&](auto &Induction) -> bool {
916*5f7ddb14SDimitry Andric InductionDescriptor IndDesc = Induction.second;
917*5f7ddb14SDimitry Andric return IndDesc.getExactFPMathInst();
918*5f7ddb14SDimitry Andric }))
919*5f7ddb14SDimitry Andric return false;
920*5f7ddb14SDimitry Andric
921*5f7ddb14SDimitry Andric // We can now only vectorize if all reductions with Exact FP math also
922*5f7ddb14SDimitry Andric // have the isOrdered flag set, which indicates that we can move the
923*5f7ddb14SDimitry Andric // reduction operations in-loop.
924*5f7ddb14SDimitry Andric return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
925*5f7ddb14SDimitry Andric const RecurrenceDescriptor &RdxDesc = Reduction.second;
926*5f7ddb14SDimitry Andric return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
927*5f7ddb14SDimitry Andric }));
928*5f7ddb14SDimitry Andric }
929*5f7ddb14SDimitry Andric
isInductionPhi(const Value * V)9300b57cec5SDimitry Andric bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
9310b57cec5SDimitry Andric Value *In0 = const_cast<Value *>(V);
9320b57cec5SDimitry Andric PHINode *PN = dyn_cast_or_null<PHINode>(In0);
9330b57cec5SDimitry Andric if (!PN)
9340b57cec5SDimitry Andric return false;
9350b57cec5SDimitry Andric
9360b57cec5SDimitry Andric return Inductions.count(PN);
9370b57cec5SDimitry Andric }
9380b57cec5SDimitry Andric
isCastedInductionVariable(const Value * V)9390b57cec5SDimitry Andric bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
9400b57cec5SDimitry Andric auto *Inst = dyn_cast<Instruction>(V);
9410b57cec5SDimitry Andric return (Inst && InductionCastsToIgnore.count(Inst));
9420b57cec5SDimitry Andric }
9430b57cec5SDimitry Andric
isInductionVariable(const Value * V)9440b57cec5SDimitry Andric bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
9450b57cec5SDimitry Andric return isInductionPhi(V) || isCastedInductionVariable(V);
9460b57cec5SDimitry Andric }
9470b57cec5SDimitry Andric
isFirstOrderRecurrence(const PHINode * Phi)9480b57cec5SDimitry Andric bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
9490b57cec5SDimitry Andric return FirstOrderRecurrences.count(Phi);
9500b57cec5SDimitry Andric }
9510b57cec5SDimitry Andric
blockNeedsPredication(BasicBlock * BB) const952*5f7ddb14SDimitry Andric bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
9530b57cec5SDimitry Andric return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
9540b57cec5SDimitry Andric }
9550b57cec5SDimitry Andric
blockCanBePredicated(BasicBlock * BB,SmallPtrSetImpl<Value * > & SafePtrs,SmallPtrSetImpl<const Instruction * > & MaskedOp,SmallPtrSetImpl<Instruction * > & ConditionalAssumes) const9560b57cec5SDimitry Andric bool LoopVectorizationLegality::blockCanBePredicated(
957af732203SDimitry Andric BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
958af732203SDimitry Andric SmallPtrSetImpl<const Instruction *> &MaskedOp,
959af732203SDimitry Andric SmallPtrSetImpl<Instruction *> &ConditionalAssumes) const {
9600b57cec5SDimitry Andric for (Instruction &I : *BB) {
9610b57cec5SDimitry Andric // Check that we don't have a constant expression that can trap as operand.
9620b57cec5SDimitry Andric for (Value *Operand : I.operands()) {
9630b57cec5SDimitry Andric if (auto *C = dyn_cast<Constant>(Operand))
9640b57cec5SDimitry Andric if (C->canTrap())
9650b57cec5SDimitry Andric return false;
9660b57cec5SDimitry Andric }
9675ffd83dbSDimitry Andric
9685ffd83dbSDimitry Andric // We can predicate blocks with calls to assume, as long as we drop them in
9695ffd83dbSDimitry Andric // case we flatten the CFG via predication.
9705ffd83dbSDimitry Andric if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
9715ffd83dbSDimitry Andric ConditionalAssumes.insert(&I);
9725ffd83dbSDimitry Andric continue;
9735ffd83dbSDimitry Andric }
9745ffd83dbSDimitry Andric
975af732203SDimitry Andric // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
976af732203SDimitry Andric // TODO: there might be cases that it should block the vectorization. Let's
977af732203SDimitry Andric // ignore those for now.
978af732203SDimitry Andric if (isa<NoAliasScopeDeclInst>(&I))
979af732203SDimitry Andric continue;
980af732203SDimitry Andric
9810b57cec5SDimitry Andric // We might be able to hoist the load.
9820b57cec5SDimitry Andric if (I.mayReadFromMemory()) {
9830b57cec5SDimitry Andric auto *LI = dyn_cast<LoadInst>(&I);
9840b57cec5SDimitry Andric if (!LI)
9850b57cec5SDimitry Andric return false;
9860b57cec5SDimitry Andric if (!SafePtrs.count(LI->getPointerOperand())) {
9870b57cec5SDimitry Andric MaskedOp.insert(LI);
9880b57cec5SDimitry Andric continue;
9890b57cec5SDimitry Andric }
9900b57cec5SDimitry Andric }
9910b57cec5SDimitry Andric
9920b57cec5SDimitry Andric if (I.mayWriteToMemory()) {
9930b57cec5SDimitry Andric auto *SI = dyn_cast<StoreInst>(&I);
9940b57cec5SDimitry Andric if (!SI)
9950b57cec5SDimitry Andric return false;
9960b57cec5SDimitry Andric // Predicated store requires some form of masking:
9970b57cec5SDimitry Andric // 1) masked store HW instruction,
9980b57cec5SDimitry Andric // 2) emulation via load-blend-store (only if safe and legal to do so,
9990b57cec5SDimitry Andric // be aware on the race conditions), or
10000b57cec5SDimitry Andric // 3) element-by-element predicate check and scalar store.
10010b57cec5SDimitry Andric MaskedOp.insert(SI);
10020b57cec5SDimitry Andric continue;
10030b57cec5SDimitry Andric }
10040b57cec5SDimitry Andric if (I.mayThrow())
10050b57cec5SDimitry Andric return false;
10060b57cec5SDimitry Andric }
10070b57cec5SDimitry Andric
10080b57cec5SDimitry Andric return true;
10090b57cec5SDimitry Andric }
10100b57cec5SDimitry Andric
canVectorizeWithIfConvert()10110b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
10120b57cec5SDimitry Andric if (!EnableIfConversion) {
10130b57cec5SDimitry Andric reportVectorizationFailure("If-conversion is disabled",
10140b57cec5SDimitry Andric "if-conversion is disabled",
10158bcb0991SDimitry Andric "IfConversionDisabled",
10168bcb0991SDimitry Andric ORE, TheLoop);
10170b57cec5SDimitry Andric return false;
10180b57cec5SDimitry Andric }
10190b57cec5SDimitry Andric
10200b57cec5SDimitry Andric assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
10210b57cec5SDimitry Andric
10228bcb0991SDimitry Andric // A list of pointers which are known to be dereferenceable within scope of
10238bcb0991SDimitry Andric // the loop body for each iteration of the loop which executes. That is,
10248bcb0991SDimitry Andric // the memory pointed to can be dereferenced (with the access size implied by
10258bcb0991SDimitry Andric // the value's type) unconditionally within the loop header without
10268bcb0991SDimitry Andric // introducing a new fault.
10275ffd83dbSDimitry Andric SmallPtrSet<Value *, 8> SafePointers;
10280b57cec5SDimitry Andric
10290b57cec5SDimitry Andric // Collect safe addresses.
10300b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
10318bcb0991SDimitry Andric if (!blockNeedsPredication(BB)) {
10320b57cec5SDimitry Andric for (Instruction &I : *BB)
10330b57cec5SDimitry Andric if (auto *Ptr = getLoadStorePointerOperand(&I))
10345ffd83dbSDimitry Andric SafePointers.insert(Ptr);
10358bcb0991SDimitry Andric continue;
10368bcb0991SDimitry Andric }
10378bcb0991SDimitry Andric
10388bcb0991SDimitry Andric // For a block which requires predication, a address may be safe to access
10398bcb0991SDimitry Andric // in the loop w/o predication if we can prove dereferenceability facts
10408bcb0991SDimitry Andric // sufficient to ensure it'll never fault within the loop. For the moment,
10418bcb0991SDimitry Andric // we restrict this to loads; stores are more complicated due to
10428bcb0991SDimitry Andric // concurrency restrictions.
10438bcb0991SDimitry Andric ScalarEvolution &SE = *PSE.getSE();
10448bcb0991SDimitry Andric for (Instruction &I : *BB) {
10458bcb0991SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(&I);
1046af732203SDimitry Andric if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
10478bcb0991SDimitry Andric isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
10485ffd83dbSDimitry Andric SafePointers.insert(LI->getPointerOperand());
10498bcb0991SDimitry Andric }
10500b57cec5SDimitry Andric }
10510b57cec5SDimitry Andric
10520b57cec5SDimitry Andric // Collect the blocks that need predication.
10530b57cec5SDimitry Andric BasicBlock *Header = TheLoop->getHeader();
10540b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
10550b57cec5SDimitry Andric // We don't support switch statements inside loops.
10560b57cec5SDimitry Andric if (!isa<BranchInst>(BB->getTerminator())) {
10570b57cec5SDimitry Andric reportVectorizationFailure("Loop contains a switch statement",
10580b57cec5SDimitry Andric "loop contains a switch statement",
10598bcb0991SDimitry Andric "LoopContainsSwitch", ORE, TheLoop,
10608bcb0991SDimitry Andric BB->getTerminator());
10610b57cec5SDimitry Andric return false;
10620b57cec5SDimitry Andric }
10630b57cec5SDimitry Andric
10640b57cec5SDimitry Andric // We must be able to predicate all blocks that need to be predicated.
10650b57cec5SDimitry Andric if (blockNeedsPredication(BB)) {
1066af732203SDimitry Andric if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
1067af732203SDimitry Andric ConditionalAssumes)) {
10680b57cec5SDimitry Andric reportVectorizationFailure(
10690b57cec5SDimitry Andric "Control flow cannot be substituted for a select",
10700b57cec5SDimitry Andric "control flow cannot be substituted for a select",
10718bcb0991SDimitry Andric "NoCFGForSelect", ORE, TheLoop,
10728bcb0991SDimitry Andric BB->getTerminator());
10730b57cec5SDimitry Andric return false;
10740b57cec5SDimitry Andric }
10750b57cec5SDimitry Andric } else if (BB != Header && !canIfConvertPHINodes(BB)) {
10760b57cec5SDimitry Andric reportVectorizationFailure(
10770b57cec5SDimitry Andric "Control flow cannot be substituted for a select",
10780b57cec5SDimitry Andric "control flow cannot be substituted for a select",
10798bcb0991SDimitry Andric "NoCFGForSelect", ORE, TheLoop,
10808bcb0991SDimitry Andric BB->getTerminator());
10810b57cec5SDimitry Andric return false;
10820b57cec5SDimitry Andric }
10830b57cec5SDimitry Andric }
10840b57cec5SDimitry Andric
10850b57cec5SDimitry Andric // We can if-convert this loop.
10860b57cec5SDimitry Andric return true;
10870b57cec5SDimitry Andric }
10880b57cec5SDimitry Andric
10890b57cec5SDimitry Andric // Helper function to canVectorizeLoopNestCFG.
canVectorizeLoopCFG(Loop * Lp,bool UseVPlanNativePath)10900b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
10910b57cec5SDimitry Andric bool UseVPlanNativePath) {
1092af732203SDimitry Andric assert((UseVPlanNativePath || Lp->isInnermost()) &&
10930b57cec5SDimitry Andric "VPlan-native path is not enabled.");
10940b57cec5SDimitry Andric
10950b57cec5SDimitry Andric // TODO: ORE should be improved to show more accurate information when an
10960b57cec5SDimitry Andric // outer loop can't be vectorized because a nested loop is not understood or
10970b57cec5SDimitry Andric // legal. Something like: "outer_loop_location: loop not vectorized:
10980b57cec5SDimitry Andric // (inner_loop_location) loop control flow is not understood by vectorizer".
10990b57cec5SDimitry Andric
11000b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
11010b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
11020b57cec5SDimitry Andric bool Result = true;
11030b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
11040b57cec5SDimitry Andric
11050b57cec5SDimitry Andric // We must have a loop in canonical form. Loops with indirectbr in them cannot
11060b57cec5SDimitry Andric // be canonicalized.
11070b57cec5SDimitry Andric if (!Lp->getLoopPreheader()) {
11080b57cec5SDimitry Andric reportVectorizationFailure("Loop doesn't have a legal pre-header",
11090b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
11108bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
11110b57cec5SDimitry Andric if (DoExtraAnalysis)
11120b57cec5SDimitry Andric Result = false;
11130b57cec5SDimitry Andric else
11140b57cec5SDimitry Andric return false;
11150b57cec5SDimitry Andric }
11160b57cec5SDimitry Andric
11170b57cec5SDimitry Andric // We must have a single backedge.
11180b57cec5SDimitry Andric if (Lp->getNumBackEdges() != 1) {
11190b57cec5SDimitry Andric reportVectorizationFailure("The loop must have a single backedge",
11200b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
11218bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
11220b57cec5SDimitry Andric if (DoExtraAnalysis)
11230b57cec5SDimitry Andric Result = false;
11240b57cec5SDimitry Andric else
11250b57cec5SDimitry Andric return false;
11260b57cec5SDimitry Andric }
11270b57cec5SDimitry Andric
11280b57cec5SDimitry Andric return Result;
11290b57cec5SDimitry Andric }
11300b57cec5SDimitry Andric
canVectorizeLoopNestCFG(Loop * Lp,bool UseVPlanNativePath)11310b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
11320b57cec5SDimitry Andric Loop *Lp, bool UseVPlanNativePath) {
11330b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
11340b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
11350b57cec5SDimitry Andric bool Result = true;
11360b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
11370b57cec5SDimitry Andric if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
11380b57cec5SDimitry Andric if (DoExtraAnalysis)
11390b57cec5SDimitry Andric Result = false;
11400b57cec5SDimitry Andric else
11410b57cec5SDimitry Andric return false;
11420b57cec5SDimitry Andric }
11430b57cec5SDimitry Andric
11440b57cec5SDimitry Andric // Recursively check whether the loop control flow of nested loops is
11450b57cec5SDimitry Andric // understood.
11460b57cec5SDimitry Andric for (Loop *SubLp : *Lp)
11470b57cec5SDimitry Andric if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
11480b57cec5SDimitry Andric if (DoExtraAnalysis)
11490b57cec5SDimitry Andric Result = false;
11500b57cec5SDimitry Andric else
11510b57cec5SDimitry Andric return false;
11520b57cec5SDimitry Andric }
11530b57cec5SDimitry Andric
11540b57cec5SDimitry Andric return Result;
11550b57cec5SDimitry Andric }
11560b57cec5SDimitry Andric
canVectorize(bool UseVPlanNativePath)11570b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
11580b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
11590b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
11600b57cec5SDimitry Andric bool Result = true;
11610b57cec5SDimitry Andric
11620b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
11630b57cec5SDimitry Andric // Check whether the loop-related control flow in the loop nest is expected by
11640b57cec5SDimitry Andric // vectorizer.
11650b57cec5SDimitry Andric if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
11660b57cec5SDimitry Andric if (DoExtraAnalysis)
11670b57cec5SDimitry Andric Result = false;
11680b57cec5SDimitry Andric else
11690b57cec5SDimitry Andric return false;
11700b57cec5SDimitry Andric }
11710b57cec5SDimitry Andric
11720b57cec5SDimitry Andric // We need to have a loop header.
11730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
11740b57cec5SDimitry Andric << '\n');
11750b57cec5SDimitry Andric
11760b57cec5SDimitry Andric // Specific checks for outer loops. We skip the remaining legal checks at this
11770b57cec5SDimitry Andric // point because they don't support outer loops.
1178af732203SDimitry Andric if (!TheLoop->isInnermost()) {
11790b57cec5SDimitry Andric assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
11800b57cec5SDimitry Andric
11810b57cec5SDimitry Andric if (!canVectorizeOuterLoop()) {
11820b57cec5SDimitry Andric reportVectorizationFailure("Unsupported outer loop",
11830b57cec5SDimitry Andric "unsupported outer loop",
11848bcb0991SDimitry Andric "UnsupportedOuterLoop",
11858bcb0991SDimitry Andric ORE, TheLoop);
11860b57cec5SDimitry Andric // TODO: Implement DoExtraAnalysis when subsequent legal checks support
11870b57cec5SDimitry Andric // outer loops.
11880b57cec5SDimitry Andric return false;
11890b57cec5SDimitry Andric }
11900b57cec5SDimitry Andric
11910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
11920b57cec5SDimitry Andric return Result;
11930b57cec5SDimitry Andric }
11940b57cec5SDimitry Andric
1195af732203SDimitry Andric assert(TheLoop->isInnermost() && "Inner loop expected.");
11960b57cec5SDimitry Andric // Check if we can if-convert non-single-bb loops.
11970b57cec5SDimitry Andric unsigned NumBlocks = TheLoop->getNumBlocks();
11980b57cec5SDimitry Andric if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
11990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
12000b57cec5SDimitry Andric if (DoExtraAnalysis)
12010b57cec5SDimitry Andric Result = false;
12020b57cec5SDimitry Andric else
12030b57cec5SDimitry Andric return false;
12040b57cec5SDimitry Andric }
12050b57cec5SDimitry Andric
12060b57cec5SDimitry Andric // Check if we can vectorize the instructions and CFG in this loop.
12070b57cec5SDimitry Andric if (!canVectorizeInstrs()) {
12080b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
12090b57cec5SDimitry Andric if (DoExtraAnalysis)
12100b57cec5SDimitry Andric Result = false;
12110b57cec5SDimitry Andric else
12120b57cec5SDimitry Andric return false;
12130b57cec5SDimitry Andric }
12140b57cec5SDimitry Andric
12150b57cec5SDimitry Andric // Go over each instruction and look at memory deps.
12160b57cec5SDimitry Andric if (!canVectorizeMemory()) {
12170b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
12180b57cec5SDimitry Andric if (DoExtraAnalysis)
12190b57cec5SDimitry Andric Result = false;
12200b57cec5SDimitry Andric else
12210b57cec5SDimitry Andric return false;
12220b57cec5SDimitry Andric }
12230b57cec5SDimitry Andric
12240b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
12250b57cec5SDimitry Andric << (LAI->getRuntimePointerChecking()->Need
12260b57cec5SDimitry Andric ? " (with a runtime bound check)"
12270b57cec5SDimitry Andric : "")
12280b57cec5SDimitry Andric << "!\n");
12290b57cec5SDimitry Andric
12300b57cec5SDimitry Andric unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
12310b57cec5SDimitry Andric if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
12320b57cec5SDimitry Andric SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
12330b57cec5SDimitry Andric
12340b57cec5SDimitry Andric if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
12350b57cec5SDimitry Andric reportVectorizationFailure("Too many SCEV checks needed",
12360b57cec5SDimitry Andric "Too many SCEV assumptions need to be made and checked at runtime",
12378bcb0991SDimitry Andric "TooManySCEVRunTimeChecks", ORE, TheLoop);
12380b57cec5SDimitry Andric if (DoExtraAnalysis)
12390b57cec5SDimitry Andric Result = false;
12400b57cec5SDimitry Andric else
12410b57cec5SDimitry Andric return false;
12420b57cec5SDimitry Andric }
12430b57cec5SDimitry Andric
12440b57cec5SDimitry Andric // Okay! We've done all the tests. If any have failed, return false. Otherwise
12450b57cec5SDimitry Andric // we can vectorize, and at this point we don't have any other mem analysis
12460b57cec5SDimitry Andric // which may limit our maximum vectorization factor, so just return true with
12470b57cec5SDimitry Andric // no restrictions.
12480b57cec5SDimitry Andric return Result;
12490b57cec5SDimitry Andric }
12500b57cec5SDimitry Andric
prepareToFoldTailByMasking()12518bcb0991SDimitry Andric bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
12520b57cec5SDimitry Andric
12530b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
12540b57cec5SDimitry Andric
12558bcb0991SDimitry Andric SmallPtrSet<const Value *, 8> ReductionLiveOuts;
12560b57cec5SDimitry Andric
12575ffd83dbSDimitry Andric for (auto &Reduction : getReductionVars())
12588bcb0991SDimitry Andric ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
12598bcb0991SDimitry Andric
12608bcb0991SDimitry Andric // TODO: handle non-reduction outside users when tail is folded by masking.
12610b57cec5SDimitry Andric for (auto *AE : AllowedExit) {
12628bcb0991SDimitry Andric // Check that all users of allowed exit values are inside the loop or
12638bcb0991SDimitry Andric // are the live-out of a reduction.
12648bcb0991SDimitry Andric if (ReductionLiveOuts.count(AE))
12658bcb0991SDimitry Andric continue;
12660b57cec5SDimitry Andric for (User *U : AE->users()) {
12670b57cec5SDimitry Andric Instruction *UI = cast<Instruction>(U);
12680b57cec5SDimitry Andric if (TheLoop->contains(UI))
12690b57cec5SDimitry Andric continue;
1270af732203SDimitry Andric LLVM_DEBUG(
1271af732203SDimitry Andric dbgs()
1272af732203SDimitry Andric << "LV: Cannot fold tail by masking, loop has an outside user for "
1273af732203SDimitry Andric << *UI << "\n");
12740b57cec5SDimitry Andric return false;
12750b57cec5SDimitry Andric }
12760b57cec5SDimitry Andric }
12770b57cec5SDimitry Andric
12780b57cec5SDimitry Andric // The list of pointers that we can safely read and write to remains empty.
12790b57cec5SDimitry Andric SmallPtrSet<Value *, 8> SafePointers;
12800b57cec5SDimitry Andric
1281af732203SDimitry Andric SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
1282af732203SDimitry Andric SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
1283af732203SDimitry Andric
12840b57cec5SDimitry Andric // Check and mark all blocks for predication, including those that ordinarily
12850b57cec5SDimitry Andric // do not need predication such as the header block.
12860b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
1287af732203SDimitry Andric if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
1288af732203SDimitry Andric TmpConditionalAssumes)) {
1289af732203SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
12900b57cec5SDimitry Andric return false;
12910b57cec5SDimitry Andric }
12920b57cec5SDimitry Andric }
12930b57cec5SDimitry Andric
12940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1295af732203SDimitry Andric
1296af732203SDimitry Andric MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
1297af732203SDimitry Andric ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
1298af732203SDimitry Andric TmpConditionalAssumes.end());
1299af732203SDimitry Andric
13000b57cec5SDimitry Andric return true;
13010b57cec5SDimitry Andric }
13020b57cec5SDimitry Andric
13030b57cec5SDimitry Andric } // namespace llvm
1304