10b57cec5SDimitry Andric //===- LoopVectorizationLegality.cpp --------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file provides loop vectorization legality analysis. Original code
100b57cec5SDimitry Andric // resided in LoopVectorize.cpp for a long time.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // At this point, it is implemented as a utility class, not as an analysis
130b57cec5SDimitry Andric // pass. It should be easy to create an analysis pass around it if there
140b57cec5SDimitry Andric // is a need (but D45420 needs to happen first).
150b57cec5SDimitry Andric //
16e8d8bef9SDimitry Andric
170b57cec5SDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
188bcb0991SDimitry Andric #include "llvm/Analysis/Loads.h"
195ffd83dbSDimitry Andric #include "llvm/Analysis/LoopInfo.h"
2081ad6265SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
2281ad6265SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
238bcb0991SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
240b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
250b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
265ffd83dbSDimitry Andric #include "llvm/IR/PatternMatch.h"
27e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h"
285ffd83dbSDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorize.h"
290b57cec5SDimitry Andric
300b57cec5SDimitry Andric using namespace llvm;
315ffd83dbSDimitry Andric using namespace PatternMatch;
320b57cec5SDimitry Andric
330b57cec5SDimitry Andric #define LV_NAME "loop-vectorize"
340b57cec5SDimitry Andric #define DEBUG_TYPE LV_NAME
350b57cec5SDimitry Andric
360b57cec5SDimitry Andric static cl::opt<bool>
370b57cec5SDimitry Andric EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
380b57cec5SDimitry Andric cl::desc("Enable if-conversion during vectorization."));
390b57cec5SDimitry Andric
40fe013be4SDimitry Andric static cl::opt<bool>
41fe013be4SDimitry Andric AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
42fe013be4SDimitry Andric cl::desc("Enable recognition of non-constant strided "
43fe013be4SDimitry Andric "pointer induction variables."));
44fe013be4SDimitry Andric
45fe6060f1SDimitry Andric namespace llvm {
46fe6060f1SDimitry Andric cl::opt<bool>
47fe6060f1SDimitry Andric HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
48fe6060f1SDimitry Andric cl::desc("Allow enabling loop hints to reorder "
49fe6060f1SDimitry Andric "FP operations during vectorization."));
50fe6060f1SDimitry Andric }
510b57cec5SDimitry Andric
52fe6060f1SDimitry Andric // TODO: Move size-based thresholds out of legality checking, make cost based
53fe6060f1SDimitry Andric // decisions instead of hard thresholds.
540b57cec5SDimitry Andric static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
550b57cec5SDimitry Andric "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
560b57cec5SDimitry Andric cl::desc("The maximum number of SCEV checks allowed."));
570b57cec5SDimitry Andric
580b57cec5SDimitry Andric static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
590b57cec5SDimitry Andric "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
600b57cec5SDimitry Andric cl::desc("The maximum number of SCEV checks allowed with a "
610b57cec5SDimitry Andric "vectorize(enable) pragma"));
620b57cec5SDimitry Andric
630eae32dcSDimitry Andric static cl::opt<LoopVectorizeHints::ScalableForceKind>
640eae32dcSDimitry Andric ForceScalableVectorization(
650eae32dcSDimitry Andric "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
66fe6060f1SDimitry Andric cl::Hidden,
67fe6060f1SDimitry Andric cl::desc("Control whether the compiler can use scalable vectors to "
68fe6060f1SDimitry Andric "vectorize a loop"),
69fe6060f1SDimitry Andric cl::values(
70fe6060f1SDimitry Andric clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
71fe6060f1SDimitry Andric "Scalable vectorization is disabled."),
720eae32dcSDimitry Andric clEnumValN(
730eae32dcSDimitry Andric LoopVectorizeHints::SK_PreferScalable, "preferred",
740eae32dcSDimitry Andric "Scalable vectorization is available and favored when the "
750eae32dcSDimitry Andric "cost is inconclusive."),
760eae32dcSDimitry Andric clEnumValN(
770eae32dcSDimitry Andric LoopVectorizeHints::SK_PreferScalable, "on",
78fe6060f1SDimitry Andric "Scalable vectorization is available and favored when the "
79fe6060f1SDimitry Andric "cost is inconclusive.")));
80fe6060f1SDimitry Andric
810b57cec5SDimitry Andric /// Maximum vectorization interleave count.
820b57cec5SDimitry Andric static const unsigned MaxInterleaveFactor = 16;
830b57cec5SDimitry Andric
840b57cec5SDimitry Andric namespace llvm {
850b57cec5SDimitry Andric
validate(unsigned Val)860b57cec5SDimitry Andric bool LoopVectorizeHints::Hint::validate(unsigned Val) {
870b57cec5SDimitry Andric switch (Kind) {
880b57cec5SDimitry Andric case HK_WIDTH:
890b57cec5SDimitry Andric return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
90fe6060f1SDimitry Andric case HK_INTERLEAVE:
910b57cec5SDimitry Andric return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
920b57cec5SDimitry Andric case HK_FORCE:
930b57cec5SDimitry Andric return (Val <= 1);
940b57cec5SDimitry Andric case HK_ISVECTORIZED:
958bcb0991SDimitry Andric case HK_PREDICATE:
96e8d8bef9SDimitry Andric case HK_SCALABLE:
970b57cec5SDimitry Andric return (Val == 0 || Val == 1);
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric return false;
1000b57cec5SDimitry Andric }
1010b57cec5SDimitry Andric
LoopVectorizeHints(const Loop * L,bool InterleaveOnlyWhenForced,OptimizationRemarkEmitter & ORE,const TargetTransformInfo * TTI)1020b57cec5SDimitry Andric LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
1030b57cec5SDimitry Andric bool InterleaveOnlyWhenForced,
1040eae32dcSDimitry Andric OptimizationRemarkEmitter &ORE,
1050eae32dcSDimitry Andric const TargetTransformInfo *TTI)
1060b57cec5SDimitry Andric : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
107fe6060f1SDimitry Andric Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
1080b57cec5SDimitry Andric Force("vectorize.enable", FK_Undefined, HK_FORCE),
1098bcb0991SDimitry Andric IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
110e8d8bef9SDimitry Andric Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
111fe6060f1SDimitry Andric Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
112fe6060f1SDimitry Andric TheLoop(L), ORE(ORE) {
1130b57cec5SDimitry Andric // Populate values with existing loop metadata.
1140b57cec5SDimitry Andric getHintsFromMetadata();
1150b57cec5SDimitry Andric
1160b57cec5SDimitry Andric // force-vector-interleave overrides DisableInterleaving.
1170b57cec5SDimitry Andric if (VectorizerParams::isInterleaveForced())
1180b57cec5SDimitry Andric Interleave.Value = VectorizerParams::VectorizationInterleave;
1190b57cec5SDimitry Andric
1200eae32dcSDimitry Andric // If the metadata doesn't explicitly specify whether to enable scalable
1210eae32dcSDimitry Andric // vectorization, then decide based on the following criteria (increasing
1220eae32dcSDimitry Andric // level of priority):
1230eae32dcSDimitry Andric // - Target default
1240eae32dcSDimitry Andric // - Metadata width
1250eae32dcSDimitry Andric // - Force option (always overrides)
1260eae32dcSDimitry Andric if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
1270eae32dcSDimitry Andric if (TTI)
1280eae32dcSDimitry Andric Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
1290eae32dcSDimitry Andric : SK_FixedWidthOnly;
1300eae32dcSDimitry Andric
1310eae32dcSDimitry Andric if (Width.Value)
132fe6060f1SDimitry Andric // If the width is set, but the metadata says nothing about the scalable
133fe6060f1SDimitry Andric // property, then assume it concerns only a fixed-width UserVF.
134fe6060f1SDimitry Andric // If width is not set, the flag takes precedence.
1350eae32dcSDimitry Andric Scalable.Value = SK_FixedWidthOnly;
1360eae32dcSDimitry Andric }
1370eae32dcSDimitry Andric
1380eae32dcSDimitry Andric // If the flag is set to force any use of scalable vectors, override the loop
1390eae32dcSDimitry Andric // hints.
1400eae32dcSDimitry Andric if (ForceScalableVectorization.getValue() !=
1410eae32dcSDimitry Andric LoopVectorizeHints::SK_Unspecified)
1420eae32dcSDimitry Andric Scalable.Value = ForceScalableVectorization.getValue();
1430eae32dcSDimitry Andric
1440eae32dcSDimitry Andric // Scalable vectorization is disabled if no preference is specified.
1450eae32dcSDimitry Andric if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
146fe6060f1SDimitry Andric Scalable.Value = SK_FixedWidthOnly;
147fe6060f1SDimitry Andric
1480b57cec5SDimitry Andric if (IsVectorized.Value != 1)
1490b57cec5SDimitry Andric // If the vectorization width and interleaving count are both 1 then
1500b57cec5SDimitry Andric // consider the loop to have been already vectorized because there's
1510b57cec5SDimitry Andric // nothing more that we can do.
152e8d8bef9SDimitry Andric IsVectorized.Value =
153fe6060f1SDimitry Andric getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
154fe6060f1SDimitry Andric LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
1550b57cec5SDimitry Andric << "LV: Interleaving disabled by the pass manager\n");
1560b57cec5SDimitry Andric }
1570b57cec5SDimitry Andric
setAlreadyVectorized()1580b57cec5SDimitry Andric void LoopVectorizeHints::setAlreadyVectorized() {
1590b57cec5SDimitry Andric LLVMContext &Context = TheLoop->getHeader()->getContext();
1600b57cec5SDimitry Andric
1610b57cec5SDimitry Andric MDNode *IsVectorizedMD = MDNode::get(
1620b57cec5SDimitry Andric Context,
1630b57cec5SDimitry Andric {MDString::get(Context, "llvm.loop.isvectorized"),
1640b57cec5SDimitry Andric ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
1650b57cec5SDimitry Andric MDNode *LoopID = TheLoop->getLoopID();
1660b57cec5SDimitry Andric MDNode *NewLoopID =
1670b57cec5SDimitry Andric makePostTransformationMetadata(Context, LoopID,
1680b57cec5SDimitry Andric {Twine(Prefix(), "vectorize.").str(),
1690b57cec5SDimitry Andric Twine(Prefix(), "interleave.").str()},
1700b57cec5SDimitry Andric {IsVectorizedMD});
1710b57cec5SDimitry Andric TheLoop->setLoopID(NewLoopID);
1720b57cec5SDimitry Andric
1730b57cec5SDimitry Andric // Update internal cache.
1740b57cec5SDimitry Andric IsVectorized.Value = 1;
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric
allowVectorization(Function * F,Loop * L,bool VectorizeOnlyWhenForced) const1770b57cec5SDimitry Andric bool LoopVectorizeHints::allowVectorization(
1780b57cec5SDimitry Andric Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
1790b57cec5SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Disabled) {
1800b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
1810b57cec5SDimitry Andric emitRemarkWithHints();
1820b57cec5SDimitry Andric return false;
1830b57cec5SDimitry Andric }
1840b57cec5SDimitry Andric
1850b57cec5SDimitry Andric if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
1860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
1870b57cec5SDimitry Andric emitRemarkWithHints();
1880b57cec5SDimitry Andric return false;
1890b57cec5SDimitry Andric }
1900b57cec5SDimitry Andric
1910b57cec5SDimitry Andric if (getIsVectorized() == 1) {
1920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
1930b57cec5SDimitry Andric // FIXME: Add interleave.disable metadata. This will allow
1940b57cec5SDimitry Andric // vectorize.disable to be used without disabling the pass and errors
1950b57cec5SDimitry Andric // to differentiate between disabled vectorization and a width of 1.
1960b57cec5SDimitry Andric ORE.emit([&]() {
1970b57cec5SDimitry Andric return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
1980b57cec5SDimitry Andric "AllDisabled", L->getStartLoc(),
1990b57cec5SDimitry Andric L->getHeader())
2000b57cec5SDimitry Andric << "loop not vectorized: vectorization and interleaving are "
2010b57cec5SDimitry Andric "explicitly disabled, or the loop has already been "
2020b57cec5SDimitry Andric "vectorized";
2030b57cec5SDimitry Andric });
2040b57cec5SDimitry Andric return false;
2050b57cec5SDimitry Andric }
2060b57cec5SDimitry Andric
2070b57cec5SDimitry Andric return true;
2080b57cec5SDimitry Andric }
2090b57cec5SDimitry Andric
emitRemarkWithHints() const2100b57cec5SDimitry Andric void LoopVectorizeHints::emitRemarkWithHints() const {
2110b57cec5SDimitry Andric using namespace ore;
2120b57cec5SDimitry Andric
2130b57cec5SDimitry Andric ORE.emit([&]() {
2140b57cec5SDimitry Andric if (Force.Value == LoopVectorizeHints::FK_Disabled)
2150b57cec5SDimitry Andric return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
2160b57cec5SDimitry Andric TheLoop->getStartLoc(),
2170b57cec5SDimitry Andric TheLoop->getHeader())
2180b57cec5SDimitry Andric << "loop not vectorized: vectorization is explicitly disabled";
2190b57cec5SDimitry Andric else {
2200b57cec5SDimitry Andric OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
2210b57cec5SDimitry Andric TheLoop->getStartLoc(), TheLoop->getHeader());
2220b57cec5SDimitry Andric R << "loop not vectorized";
2230b57cec5SDimitry Andric if (Force.Value == LoopVectorizeHints::FK_Enabled) {
2240b57cec5SDimitry Andric R << " (Force=" << NV("Force", true);
2250b57cec5SDimitry Andric if (Width.Value != 0)
226e8d8bef9SDimitry Andric R << ", Vector Width=" << NV("VectorWidth", getWidth());
227fe6060f1SDimitry Andric if (getInterleave() != 0)
228fe6060f1SDimitry Andric R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
2290b57cec5SDimitry Andric R << ")";
2300b57cec5SDimitry Andric }
2310b57cec5SDimitry Andric return R;
2320b57cec5SDimitry Andric }
2330b57cec5SDimitry Andric });
2340b57cec5SDimitry Andric }
2350b57cec5SDimitry Andric
vectorizeAnalysisPassName() const2360b57cec5SDimitry Andric const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
237e8d8bef9SDimitry Andric if (getWidth() == ElementCount::getFixed(1))
2380b57cec5SDimitry Andric return LV_NAME;
2390b57cec5SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Disabled)
2400b57cec5SDimitry Andric return LV_NAME;
241e8d8bef9SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
2420b57cec5SDimitry Andric return LV_NAME;
2430b57cec5SDimitry Andric return OptimizationRemarkAnalysis::AlwaysPrint;
2440b57cec5SDimitry Andric }
2450b57cec5SDimitry Andric
allowReordering() const246fe6060f1SDimitry Andric bool LoopVectorizeHints::allowReordering() const {
247fe6060f1SDimitry Andric // Allow the vectorizer to change the order of operations if enabling
248fe6060f1SDimitry Andric // loop hints are provided
249fe6060f1SDimitry Andric ElementCount EC = getWidth();
250fe6060f1SDimitry Andric return HintsAllowReordering &&
251fe6060f1SDimitry Andric (getForce() == LoopVectorizeHints::FK_Enabled ||
252fe6060f1SDimitry Andric EC.getKnownMinValue() > 1);
253fe6060f1SDimitry Andric }
254fe6060f1SDimitry Andric
getHintsFromMetadata()2550b57cec5SDimitry Andric void LoopVectorizeHints::getHintsFromMetadata() {
2560b57cec5SDimitry Andric MDNode *LoopID = TheLoop->getLoopID();
2570b57cec5SDimitry Andric if (!LoopID)
2580b57cec5SDimitry Andric return;
2590b57cec5SDimitry Andric
2600b57cec5SDimitry Andric // First operand should refer to the loop id itself.
2610b57cec5SDimitry Andric assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
2620b57cec5SDimitry Andric assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
2630b57cec5SDimitry Andric
2640b57cec5SDimitry Andric for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
2650b57cec5SDimitry Andric const MDString *S = nullptr;
2660b57cec5SDimitry Andric SmallVector<Metadata *, 4> Args;
2670b57cec5SDimitry Andric
2680b57cec5SDimitry Andric // The expected hint is either a MDString or a MDNode with the first
2690b57cec5SDimitry Andric // operand a MDString.
2700b57cec5SDimitry Andric if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
2710b57cec5SDimitry Andric if (!MD || MD->getNumOperands() == 0)
2720b57cec5SDimitry Andric continue;
2730b57cec5SDimitry Andric S = dyn_cast<MDString>(MD->getOperand(0));
2740b57cec5SDimitry Andric for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
2750b57cec5SDimitry Andric Args.push_back(MD->getOperand(i));
2760b57cec5SDimitry Andric } else {
2770b57cec5SDimitry Andric S = dyn_cast<MDString>(LoopID->getOperand(i));
2780b57cec5SDimitry Andric assert(Args.size() == 0 && "too many arguments for MDString");
2790b57cec5SDimitry Andric }
2800b57cec5SDimitry Andric
2810b57cec5SDimitry Andric if (!S)
2820b57cec5SDimitry Andric continue;
2830b57cec5SDimitry Andric
2840b57cec5SDimitry Andric // Check if the hint starts with the loop metadata prefix.
2850b57cec5SDimitry Andric StringRef Name = S->getString();
2860b57cec5SDimitry Andric if (Args.size() == 1)
2870b57cec5SDimitry Andric setHint(Name, Args[0]);
2880b57cec5SDimitry Andric }
2890b57cec5SDimitry Andric }
2900b57cec5SDimitry Andric
setHint(StringRef Name,Metadata * Arg)2910b57cec5SDimitry Andric void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
292*c9157d92SDimitry Andric if (!Name.starts_with(Prefix()))
2930b57cec5SDimitry Andric return;
2940b57cec5SDimitry Andric Name = Name.substr(Prefix().size(), StringRef::npos);
2950b57cec5SDimitry Andric
2960b57cec5SDimitry Andric const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
2970b57cec5SDimitry Andric if (!C)
2980b57cec5SDimitry Andric return;
2990b57cec5SDimitry Andric unsigned Val = C->getZExtValue();
3000b57cec5SDimitry Andric
301e8d8bef9SDimitry Andric Hint *Hints[] = {&Width, &Interleave, &Force,
302e8d8bef9SDimitry Andric &IsVectorized, &Predicate, &Scalable};
303bdd1243dSDimitry Andric for (auto *H : Hints) {
3040b57cec5SDimitry Andric if (Name == H->Name) {
3050b57cec5SDimitry Andric if (H->validate(Val))
3060b57cec5SDimitry Andric H->Value = Val;
3070b57cec5SDimitry Andric else
3080b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
3090b57cec5SDimitry Andric break;
3100b57cec5SDimitry Andric }
3110b57cec5SDimitry Andric }
3120b57cec5SDimitry Andric }
3130b57cec5SDimitry Andric
3140b57cec5SDimitry Andric // Return true if the inner loop \p Lp is uniform with regard to the outer loop
3150b57cec5SDimitry Andric // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
3160b57cec5SDimitry Andric // executing the inner loop will execute the same iterations). This check is
3170b57cec5SDimitry Andric // very constrained for now but it will be relaxed in the future. \p Lp is
3180b57cec5SDimitry Andric // considered uniform if it meets all the following conditions:
3190b57cec5SDimitry Andric // 1) it has a canonical IV (starting from 0 and with stride 1),
3200b57cec5SDimitry Andric // 2) its latch terminator is a conditional branch and,
3210b57cec5SDimitry Andric // 3) its latch condition is a compare instruction whose operands are the
3220b57cec5SDimitry Andric // canonical IV and an OuterLp invariant.
3230b57cec5SDimitry Andric // This check doesn't take into account the uniformity of other conditions not
3240b57cec5SDimitry Andric // related to the loop latch because they don't affect the loop uniformity.
3250b57cec5SDimitry Andric //
3260b57cec5SDimitry Andric // NOTE: We decided to keep all these checks and its associated documentation
3270b57cec5SDimitry Andric // together so that we can easily have a picture of the current supported loop
3280b57cec5SDimitry Andric // nests. However, some of the current checks don't depend on \p OuterLp and
3290b57cec5SDimitry Andric // would be redundantly executed for each \p Lp if we invoked this function for
3300b57cec5SDimitry Andric // different candidate outer loops. This is not the case for now because we
3310b57cec5SDimitry Andric // don't currently have the infrastructure to evaluate multiple candidate outer
3320b57cec5SDimitry Andric // loops and \p OuterLp will be a fixed parameter while we only support explicit
3330b57cec5SDimitry Andric // outer loop vectorization. It's also very likely that these checks go away
3340b57cec5SDimitry Andric // before introducing the aforementioned infrastructure. However, if this is not
3350b57cec5SDimitry Andric // the case, we should move the \p OuterLp independent checks to a separate
3360b57cec5SDimitry Andric // function that is only executed once for each \p Lp.
isUniformLoop(Loop * Lp,Loop * OuterLp)3370b57cec5SDimitry Andric static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
3380b57cec5SDimitry Andric assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
3390b57cec5SDimitry Andric
3400b57cec5SDimitry Andric // If Lp is the outer loop, it's uniform by definition.
3410b57cec5SDimitry Andric if (Lp == OuterLp)
3420b57cec5SDimitry Andric return true;
3430b57cec5SDimitry Andric assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
3440b57cec5SDimitry Andric
3450b57cec5SDimitry Andric // 1.
3460b57cec5SDimitry Andric PHINode *IV = Lp->getCanonicalInductionVariable();
3470b57cec5SDimitry Andric if (!IV) {
3480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
3490b57cec5SDimitry Andric return false;
3500b57cec5SDimitry Andric }
3510b57cec5SDimitry Andric
3520b57cec5SDimitry Andric // 2.
3530b57cec5SDimitry Andric BasicBlock *Latch = Lp->getLoopLatch();
3540b57cec5SDimitry Andric auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
3550b57cec5SDimitry Andric if (!LatchBr || LatchBr->isUnconditional()) {
3560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
3570b57cec5SDimitry Andric return false;
3580b57cec5SDimitry Andric }
3590b57cec5SDimitry Andric
3600b57cec5SDimitry Andric // 3.
3610b57cec5SDimitry Andric auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
3620b57cec5SDimitry Andric if (!LatchCmp) {
3630b57cec5SDimitry Andric LLVM_DEBUG(
3640b57cec5SDimitry Andric dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
3650b57cec5SDimitry Andric return false;
3660b57cec5SDimitry Andric }
3670b57cec5SDimitry Andric
3680b57cec5SDimitry Andric Value *CondOp0 = LatchCmp->getOperand(0);
3690b57cec5SDimitry Andric Value *CondOp1 = LatchCmp->getOperand(1);
3700b57cec5SDimitry Andric Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
3710b57cec5SDimitry Andric if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
3720b57cec5SDimitry Andric !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
3730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
3740b57cec5SDimitry Andric return false;
3750b57cec5SDimitry Andric }
3760b57cec5SDimitry Andric
3770b57cec5SDimitry Andric return true;
3780b57cec5SDimitry Andric }
3790b57cec5SDimitry Andric
3800b57cec5SDimitry Andric // Return true if \p Lp and all its nested loops are uniform with regard to \p
3810b57cec5SDimitry Andric // OuterLp.
isUniformLoopNest(Loop * Lp,Loop * OuterLp)3820b57cec5SDimitry Andric static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
3830b57cec5SDimitry Andric if (!isUniformLoop(Lp, OuterLp))
3840b57cec5SDimitry Andric return false;
3850b57cec5SDimitry Andric
3860b57cec5SDimitry Andric // Check if nested loops are uniform.
3870b57cec5SDimitry Andric for (Loop *SubLp : *Lp)
3880b57cec5SDimitry Andric if (!isUniformLoopNest(SubLp, OuterLp))
3890b57cec5SDimitry Andric return false;
3900b57cec5SDimitry Andric
3910b57cec5SDimitry Andric return true;
3920b57cec5SDimitry Andric }
3930b57cec5SDimitry Andric
convertPointerToIntegerType(const DataLayout & DL,Type * Ty)3940b57cec5SDimitry Andric static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
3950b57cec5SDimitry Andric if (Ty->isPointerTy())
3960b57cec5SDimitry Andric return DL.getIntPtrType(Ty);
3970b57cec5SDimitry Andric
3980b57cec5SDimitry Andric // It is possible that char's or short's overflow when we ask for the loop's
3990b57cec5SDimitry Andric // trip count, work around this by changing the type size.
4000b57cec5SDimitry Andric if (Ty->getScalarSizeInBits() < 32)
4010b57cec5SDimitry Andric return Type::getInt32Ty(Ty->getContext());
4020b57cec5SDimitry Andric
4030b57cec5SDimitry Andric return Ty;
4040b57cec5SDimitry Andric }
4050b57cec5SDimitry Andric
getWiderType(const DataLayout & DL,Type * Ty0,Type * Ty1)4060b57cec5SDimitry Andric static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
4070b57cec5SDimitry Andric Ty0 = convertPointerToIntegerType(DL, Ty0);
4080b57cec5SDimitry Andric Ty1 = convertPointerToIntegerType(DL, Ty1);
4090b57cec5SDimitry Andric if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
4100b57cec5SDimitry Andric return Ty0;
4110b57cec5SDimitry Andric return Ty1;
4120b57cec5SDimitry Andric }
4130b57cec5SDimitry Andric
4140b57cec5SDimitry Andric /// Check that the instruction has outside loop users and is not an
4150b57cec5SDimitry Andric /// identified reduction variable.
hasOutsideLoopUser(const Loop * TheLoop,Instruction * Inst,SmallPtrSetImpl<Value * > & AllowedExit)4160b57cec5SDimitry Andric static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
4170b57cec5SDimitry Andric SmallPtrSetImpl<Value *> &AllowedExit) {
4180b57cec5SDimitry Andric // Reductions, Inductions and non-header phis are allowed to have exit users. All
4190b57cec5SDimitry Andric // other instructions must not have external users.
4200b57cec5SDimitry Andric if (!AllowedExit.count(Inst))
4210b57cec5SDimitry Andric // Check that all of the users of the loop are inside the BB.
4220b57cec5SDimitry Andric for (User *U : Inst->users()) {
4230b57cec5SDimitry Andric Instruction *UI = cast<Instruction>(U);
4240b57cec5SDimitry Andric // This user may be a reduction exit value.
4250b57cec5SDimitry Andric if (!TheLoop->contains(UI)) {
4260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
4270b57cec5SDimitry Andric return true;
4280b57cec5SDimitry Andric }
4290b57cec5SDimitry Andric }
4300b57cec5SDimitry Andric return false;
4310b57cec5SDimitry Andric }
4320b57cec5SDimitry Andric
43381ad6265SDimitry Andric /// Returns true if A and B have same pointer operands or same SCEVs addresses
storeToSameAddress(ScalarEvolution * SE,StoreInst * A,StoreInst * B)43481ad6265SDimitry Andric static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A,
43581ad6265SDimitry Andric StoreInst *B) {
43681ad6265SDimitry Andric // Compare store
43781ad6265SDimitry Andric if (A == B)
43881ad6265SDimitry Andric return true;
43981ad6265SDimitry Andric
44081ad6265SDimitry Andric // Otherwise Compare pointers
44181ad6265SDimitry Andric Value *APtr = A->getPointerOperand();
44281ad6265SDimitry Andric Value *BPtr = B->getPointerOperand();
44381ad6265SDimitry Andric if (APtr == BPtr)
44481ad6265SDimitry Andric return true;
44581ad6265SDimitry Andric
44681ad6265SDimitry Andric // Otherwise compare address SCEVs
44781ad6265SDimitry Andric if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
44881ad6265SDimitry Andric return true;
44981ad6265SDimitry Andric
45081ad6265SDimitry Andric return false;
45181ad6265SDimitry Andric }
45281ad6265SDimitry Andric
isConsecutivePtr(Type * AccessTy,Value * Ptr) const453349cc55cSDimitry Andric int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
454349cc55cSDimitry Andric Value *Ptr) const {
455fe013be4SDimitry Andric // FIXME: Currently, the set of symbolic strides is sometimes queried before
456fe013be4SDimitry Andric // it's collected. This happens from canVectorizeWithIfConvert, when the
457fe013be4SDimitry Andric // pointer is checked to reference consecutive elements suitable for a
458fe013be4SDimitry Andric // masked access.
459fe013be4SDimitry Andric const auto &Strides =
460fe013be4SDimitry Andric LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
4610b57cec5SDimitry Andric
462e8d8bef9SDimitry Andric Function *F = TheLoop->getHeader()->getParent();
463e8d8bef9SDimitry Andric bool OptForSize = F->hasOptSize() ||
464e8d8bef9SDimitry Andric llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
465e8d8bef9SDimitry Andric PGSOQueryType::IRPass);
466e8d8bef9SDimitry Andric bool CanAddPredicate = !OptForSize;
467349cc55cSDimitry Andric int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
468bdd1243dSDimitry Andric CanAddPredicate, false).value_or(0);
4690b57cec5SDimitry Andric if (Stride == 1 || Stride == -1)
4700b57cec5SDimitry Andric return Stride;
4710b57cec5SDimitry Andric return 0;
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric
isInvariant(Value * V) const474fe013be4SDimitry Andric bool LoopVectorizationLegality::isInvariant(Value *V) const {
475fe013be4SDimitry Andric return LAI->isInvariant(V);
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric
478fe013be4SDimitry Andric namespace {
479fe013be4SDimitry Andric /// A rewriter to build the SCEVs for each of the VF lanes in the expected
480fe013be4SDimitry Andric /// vectorized loop, which can then be compared to detect their uniformity. This
481fe013be4SDimitry Andric /// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
482fe013be4SDimitry Andric /// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
483fe013be4SDimitry Andric /// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
484fe013be4SDimitry Andric /// uniformity.
485fe013be4SDimitry Andric class SCEVAddRecForUniformityRewriter
486fe013be4SDimitry Andric : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
487fe013be4SDimitry Andric /// Multiplier to be applied to the step of AddRecs in TheLoop.
488fe013be4SDimitry Andric unsigned StepMultiplier;
489fe013be4SDimitry Andric
490fe013be4SDimitry Andric /// Offset to be added to the AddRecs in TheLoop.
491fe013be4SDimitry Andric unsigned Offset;
492fe013be4SDimitry Andric
493fe013be4SDimitry Andric /// Loop for which to rewrite AddRecsFor.
494fe013be4SDimitry Andric Loop *TheLoop;
495fe013be4SDimitry Andric
496fe013be4SDimitry Andric /// Is any sub-expressions not analyzable w.r.t. uniformity?
497fe013be4SDimitry Andric bool CannotAnalyze = false;
498fe013be4SDimitry Andric
canAnalyze() const499fe013be4SDimitry Andric bool canAnalyze() const { return !CannotAnalyze; }
500fe013be4SDimitry Andric
501fe013be4SDimitry Andric public:
SCEVAddRecForUniformityRewriter(ScalarEvolution & SE,unsigned StepMultiplier,unsigned Offset,Loop * TheLoop)502fe013be4SDimitry Andric SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
503fe013be4SDimitry Andric unsigned Offset, Loop *TheLoop)
504fe013be4SDimitry Andric : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
505fe013be4SDimitry Andric TheLoop(TheLoop) {}
506fe013be4SDimitry Andric
visitAddRecExpr(const SCEVAddRecExpr * Expr)507fe013be4SDimitry Andric const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
508fe013be4SDimitry Andric assert(Expr->getLoop() == TheLoop &&
509fe013be4SDimitry Andric "addrec outside of TheLoop must be invariant and should have been "
510fe013be4SDimitry Andric "handled earlier");
511fe013be4SDimitry Andric // Build a new AddRec by multiplying the step by StepMultiplier and
512fe013be4SDimitry Andric // incrementing the start by Offset * step.
513fe013be4SDimitry Andric Type *Ty = Expr->getType();
514fe013be4SDimitry Andric auto *Step = Expr->getStepRecurrence(SE);
515fe013be4SDimitry Andric if (!SE.isLoopInvariant(Step, TheLoop)) {
516fe013be4SDimitry Andric CannotAnalyze = true;
517fe013be4SDimitry Andric return Expr;
518fe013be4SDimitry Andric }
519fe013be4SDimitry Andric auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
520fe013be4SDimitry Andric auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
521fe013be4SDimitry Andric auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
522fe013be4SDimitry Andric return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
523fe013be4SDimitry Andric }
524fe013be4SDimitry Andric
visit(const SCEV * S)525fe013be4SDimitry Andric const SCEV *visit(const SCEV *S) {
526fe013be4SDimitry Andric if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
527fe013be4SDimitry Andric return S;
528fe013be4SDimitry Andric return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
529fe013be4SDimitry Andric }
530fe013be4SDimitry Andric
visitUnknown(const SCEVUnknown * S)531fe013be4SDimitry Andric const SCEV *visitUnknown(const SCEVUnknown *S) {
532fe013be4SDimitry Andric if (SE.isLoopInvariant(S, TheLoop))
533fe013be4SDimitry Andric return S;
534fe013be4SDimitry Andric // The value could vary across iterations.
535fe013be4SDimitry Andric CannotAnalyze = true;
536fe013be4SDimitry Andric return S;
537fe013be4SDimitry Andric }
538fe013be4SDimitry Andric
visitCouldNotCompute(const SCEVCouldNotCompute * S)539fe013be4SDimitry Andric const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
540fe013be4SDimitry Andric // Could not analyze the expression.
541fe013be4SDimitry Andric CannotAnalyze = true;
542fe013be4SDimitry Andric return S;
543fe013be4SDimitry Andric }
544fe013be4SDimitry Andric
rewrite(const SCEV * S,ScalarEvolution & SE,unsigned StepMultiplier,unsigned Offset,Loop * TheLoop)545fe013be4SDimitry Andric static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
546fe013be4SDimitry Andric unsigned StepMultiplier, unsigned Offset,
547fe013be4SDimitry Andric Loop *TheLoop) {
548fe013be4SDimitry Andric /// Bail out if the expression does not contain an UDiv expression.
549fe013be4SDimitry Andric /// Uniform values which are not loop invariant require operations to strip
550fe013be4SDimitry Andric /// out the lowest bits. For now just look for UDivs and use it to avoid
551fe013be4SDimitry Andric /// re-writing UDIV-free expressions for other lanes to limit compile time.
552fe013be4SDimitry Andric if (!SCEVExprContains(S,
553fe013be4SDimitry Andric [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
554fe013be4SDimitry Andric return SE.getCouldNotCompute();
555fe013be4SDimitry Andric
556fe013be4SDimitry Andric SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
557fe013be4SDimitry Andric TheLoop);
558fe013be4SDimitry Andric const SCEV *Result = Rewriter.visit(S);
559fe013be4SDimitry Andric
560fe013be4SDimitry Andric if (Rewriter.canAnalyze())
561fe013be4SDimitry Andric return Result;
562fe013be4SDimitry Andric return SE.getCouldNotCompute();
563fe013be4SDimitry Andric }
564fe013be4SDimitry Andric };
565fe013be4SDimitry Andric
566fe013be4SDimitry Andric } // namespace
567fe013be4SDimitry Andric
isUniform(Value * V,ElementCount VF) const568fe013be4SDimitry Andric bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const {
569fe013be4SDimitry Andric if (isInvariant(V))
570fe013be4SDimitry Andric return true;
571fe013be4SDimitry Andric if (VF.isScalable())
572fe013be4SDimitry Andric return false;
573fe013be4SDimitry Andric if (VF.isScalar())
574fe013be4SDimitry Andric return true;
575fe013be4SDimitry Andric
576fe013be4SDimitry Andric // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
577fe013be4SDimitry Andric // never considered uniform.
578fe013be4SDimitry Andric auto *SE = PSE.getSE();
579fe013be4SDimitry Andric if (!SE->isSCEVable(V->getType()))
580fe013be4SDimitry Andric return false;
581fe013be4SDimitry Andric const SCEV *S = SE->getSCEV(V);
582fe013be4SDimitry Andric
583fe013be4SDimitry Andric // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
584fe013be4SDimitry Andric // lane 0 matches the expressions for all other lanes.
585fe013be4SDimitry Andric unsigned FixedVF = VF.getKnownMinValue();
586fe013be4SDimitry Andric const SCEV *FirstLaneExpr =
587fe013be4SDimitry Andric SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
588fe013be4SDimitry Andric if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
589fe013be4SDimitry Andric return false;
590fe013be4SDimitry Andric
591fe013be4SDimitry Andric // Make sure the expressions for lanes FixedVF-1..1 match the expression for
592fe013be4SDimitry Andric // lane 0. We check lanes in reverse order for compile-time, as frequently
593fe013be4SDimitry Andric // checking the last lane is sufficient to rule out uniformity.
594fe013be4SDimitry Andric return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
595fe013be4SDimitry Andric const SCEV *IthLaneExpr =
596fe013be4SDimitry Andric SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
597fe013be4SDimitry Andric return FirstLaneExpr == IthLaneExpr;
598fe013be4SDimitry Andric });
599fe013be4SDimitry Andric }
600fe013be4SDimitry Andric
isUniformMemOp(Instruction & I,ElementCount VF) const601fe013be4SDimitry Andric bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,
602fe013be4SDimitry Andric ElementCount VF) const {
603bdd1243dSDimitry Andric Value *Ptr = getLoadStorePointerOperand(&I);
604bdd1243dSDimitry Andric if (!Ptr)
605bdd1243dSDimitry Andric return false;
606bdd1243dSDimitry Andric // Note: There's nothing inherent which prevents predicated loads and
607bdd1243dSDimitry Andric // stores from being uniform. The current lowering simply doesn't handle
608bdd1243dSDimitry Andric // it; in particular, the cost model distinguishes scatter/gather from
609bdd1243dSDimitry Andric // scalar w/predication, and we currently rely on the scalar path.
610fe013be4SDimitry Andric return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
611bdd1243dSDimitry Andric }
612bdd1243dSDimitry Andric
canVectorizeOuterLoop()6130b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeOuterLoop() {
614e8d8bef9SDimitry Andric assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
6150b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
6160b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
6170b57cec5SDimitry Andric bool Result = true;
6180b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
6190b57cec5SDimitry Andric
6200b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
6210b57cec5SDimitry Andric // Check whether the BB terminator is a BranchInst. Any other terminator is
6220b57cec5SDimitry Andric // not supported yet.
6230b57cec5SDimitry Andric auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
6240b57cec5SDimitry Andric if (!Br) {
6250b57cec5SDimitry Andric reportVectorizationFailure("Unsupported basic block terminator",
6260b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
6278bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
6280b57cec5SDimitry Andric if (DoExtraAnalysis)
6290b57cec5SDimitry Andric Result = false;
6300b57cec5SDimitry Andric else
6310b57cec5SDimitry Andric return false;
6320b57cec5SDimitry Andric }
6330b57cec5SDimitry Andric
6340b57cec5SDimitry Andric // Check whether the BranchInst is a supported one. Only unconditional
6350b57cec5SDimitry Andric // branches, conditional branches with an outer loop invariant condition or
6360b57cec5SDimitry Andric // backedges are supported.
6370b57cec5SDimitry Andric // FIXME: We skip these checks when VPlan predication is enabled as we
6380b57cec5SDimitry Andric // want to allow divergent branches. This whole check will be removed
6390b57cec5SDimitry Andric // once VPlan predication is on by default.
64081ad6265SDimitry Andric if (Br && Br->isConditional() &&
6410b57cec5SDimitry Andric !TheLoop->isLoopInvariant(Br->getCondition()) &&
6420b57cec5SDimitry Andric !LI->isLoopHeader(Br->getSuccessor(0)) &&
6430b57cec5SDimitry Andric !LI->isLoopHeader(Br->getSuccessor(1))) {
6440b57cec5SDimitry Andric reportVectorizationFailure("Unsupported conditional branch",
6450b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
6468bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
6470b57cec5SDimitry Andric if (DoExtraAnalysis)
6480b57cec5SDimitry Andric Result = false;
6490b57cec5SDimitry Andric else
6500b57cec5SDimitry Andric return false;
6510b57cec5SDimitry Andric }
6520b57cec5SDimitry Andric }
6530b57cec5SDimitry Andric
6540b57cec5SDimitry Andric // Check whether inner loops are uniform. At this point, we only support
6550b57cec5SDimitry Andric // simple outer loops scenarios with uniform nested loops.
6560b57cec5SDimitry Andric if (!isUniformLoopNest(TheLoop /*loop nest*/,
6570b57cec5SDimitry Andric TheLoop /*context outer loop*/)) {
6580b57cec5SDimitry Andric reportVectorizationFailure("Outer loop contains divergent loops",
6590b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
6608bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
6610b57cec5SDimitry Andric if (DoExtraAnalysis)
6620b57cec5SDimitry Andric Result = false;
6630b57cec5SDimitry Andric else
6640b57cec5SDimitry Andric return false;
6650b57cec5SDimitry Andric }
6660b57cec5SDimitry Andric
6670b57cec5SDimitry Andric // Check whether we are able to set up outer loop induction.
6680b57cec5SDimitry Andric if (!setupOuterLoopInductions()) {
6690b57cec5SDimitry Andric reportVectorizationFailure("Unsupported outer loop Phi(s)",
6700b57cec5SDimitry Andric "Unsupported outer loop Phi(s)",
6718bcb0991SDimitry Andric "UnsupportedPhi", ORE, TheLoop);
6720b57cec5SDimitry Andric if (DoExtraAnalysis)
6730b57cec5SDimitry Andric Result = false;
6740b57cec5SDimitry Andric else
6750b57cec5SDimitry Andric return false;
6760b57cec5SDimitry Andric }
6770b57cec5SDimitry Andric
6780b57cec5SDimitry Andric return Result;
6790b57cec5SDimitry Andric }
6800b57cec5SDimitry Andric
addInductionPhi(PHINode * Phi,const InductionDescriptor & ID,SmallPtrSetImpl<Value * > & AllowedExit)6810b57cec5SDimitry Andric void LoopVectorizationLegality::addInductionPhi(
6820b57cec5SDimitry Andric PHINode *Phi, const InductionDescriptor &ID,
6830b57cec5SDimitry Andric SmallPtrSetImpl<Value *> &AllowedExit) {
6840b57cec5SDimitry Andric Inductions[Phi] = ID;
6850b57cec5SDimitry Andric
6860b57cec5SDimitry Andric // In case this induction also comes with casts that we know we can ignore
6870b57cec5SDimitry Andric // in the vectorized loop body, record them here. All casts could be recorded
6880b57cec5SDimitry Andric // here for ignoring, but suffices to record only the first (as it is the
6890b57cec5SDimitry Andric // only one that may bw used outside the cast sequence).
6900b57cec5SDimitry Andric const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
6910b57cec5SDimitry Andric if (!Casts.empty())
6920b57cec5SDimitry Andric InductionCastsToIgnore.insert(*Casts.begin());
6930b57cec5SDimitry Andric
6940b57cec5SDimitry Andric Type *PhiTy = Phi->getType();
6950b57cec5SDimitry Andric const DataLayout &DL = Phi->getModule()->getDataLayout();
6960b57cec5SDimitry Andric
6970b57cec5SDimitry Andric // Get the widest type.
6980b57cec5SDimitry Andric if (!PhiTy->isFloatingPointTy()) {
6990b57cec5SDimitry Andric if (!WidestIndTy)
7000b57cec5SDimitry Andric WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
7010b57cec5SDimitry Andric else
7020b57cec5SDimitry Andric WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
7030b57cec5SDimitry Andric }
7040b57cec5SDimitry Andric
7050b57cec5SDimitry Andric // Int inductions are special because we only allow one IV.
7060b57cec5SDimitry Andric if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
7070b57cec5SDimitry Andric ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
7080b57cec5SDimitry Andric isa<Constant>(ID.getStartValue()) &&
7090b57cec5SDimitry Andric cast<Constant>(ID.getStartValue())->isNullValue()) {
7100b57cec5SDimitry Andric
7110b57cec5SDimitry Andric // Use the phi node with the widest type as induction. Use the last
7120b57cec5SDimitry Andric // one if there are multiple (no good reason for doing this other
7130b57cec5SDimitry Andric // than it is expedient). We've checked that it begins at zero and
7140b57cec5SDimitry Andric // steps by one, so this is a canonical induction variable.
7150b57cec5SDimitry Andric if (!PrimaryInduction || PhiTy == WidestIndTy)
7160b57cec5SDimitry Andric PrimaryInduction = Phi;
7170b57cec5SDimitry Andric }
7180b57cec5SDimitry Andric
7190b57cec5SDimitry Andric // Both the PHI node itself, and the "post-increment" value feeding
7200b57cec5SDimitry Andric // back into the PHI node may have external users.
7210b57cec5SDimitry Andric // We can allow those uses, except if the SCEVs we have for them rely
7220b57cec5SDimitry Andric // on predicates that only hold within the loop, since allowing the exit
7230b57cec5SDimitry Andric // currently means re-using this SCEV outside the loop (see PR33706 for more
7240b57cec5SDimitry Andric // details).
72581ad6265SDimitry Andric if (PSE.getPredicate().isAlwaysTrue()) {
7260b57cec5SDimitry Andric AllowedExit.insert(Phi);
7270b57cec5SDimitry Andric AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
7280b57cec5SDimitry Andric }
7290b57cec5SDimitry Andric
7300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
7310b57cec5SDimitry Andric }
7320b57cec5SDimitry Andric
setupOuterLoopInductions()7330b57cec5SDimitry Andric bool LoopVectorizationLegality::setupOuterLoopInductions() {
7340b57cec5SDimitry Andric BasicBlock *Header = TheLoop->getHeader();
7350b57cec5SDimitry Andric
7360b57cec5SDimitry Andric // Returns true if a given Phi is a supported induction.
7370b57cec5SDimitry Andric auto isSupportedPhi = [&](PHINode &Phi) -> bool {
7380b57cec5SDimitry Andric InductionDescriptor ID;
7390b57cec5SDimitry Andric if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
7400b57cec5SDimitry Andric ID.getKind() == InductionDescriptor::IK_IntInduction) {
7410b57cec5SDimitry Andric addInductionPhi(&Phi, ID, AllowedExit);
7420b57cec5SDimitry Andric return true;
7430b57cec5SDimitry Andric } else {
7440b57cec5SDimitry Andric // Bail out for any Phi in the outer loop header that is not a supported
7450b57cec5SDimitry Andric // induction.
7460b57cec5SDimitry Andric LLVM_DEBUG(
7470b57cec5SDimitry Andric dbgs()
7480b57cec5SDimitry Andric << "LV: Found unsupported PHI for outer loop vectorization.\n");
7490b57cec5SDimitry Andric return false;
7500b57cec5SDimitry Andric }
7510b57cec5SDimitry Andric };
7520b57cec5SDimitry Andric
7530b57cec5SDimitry Andric if (llvm::all_of(Header->phis(), isSupportedPhi))
7540b57cec5SDimitry Andric return true;
7550b57cec5SDimitry Andric else
7560b57cec5SDimitry Andric return false;
7570b57cec5SDimitry Andric }
7580b57cec5SDimitry Andric
7595ffd83dbSDimitry Andric /// Checks if a function is scalarizable according to the TLI, in
7605ffd83dbSDimitry Andric /// the sense that it should be vectorized and then expanded in
7615ffd83dbSDimitry Andric /// multiple scalar calls. This is represented in the
7625ffd83dbSDimitry Andric /// TLI via mappings that do not specify a vector name, as in the
7635ffd83dbSDimitry Andric /// following example:
7645ffd83dbSDimitry Andric ///
7655ffd83dbSDimitry Andric /// const VecDesc VecIntrinsics[] = {
7665ffd83dbSDimitry Andric /// {"llvm.phx.abs.i32", "", 4}
7675ffd83dbSDimitry Andric /// };
isTLIScalarize(const TargetLibraryInfo & TLI,const CallInst & CI)7685ffd83dbSDimitry Andric static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
7695ffd83dbSDimitry Andric const StringRef ScalarName = CI.getCalledFunction()->getName();
7705ffd83dbSDimitry Andric bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
7715ffd83dbSDimitry Andric // Check that all known VFs are not associated to a vector
7725ffd83dbSDimitry Andric // function, i.e. the vector name is emty.
773fe6060f1SDimitry Andric if (Scalarize) {
774fe6060f1SDimitry Andric ElementCount WidestFixedVF, WidestScalableVF;
775fe6060f1SDimitry Andric TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
776fe6060f1SDimitry Andric for (ElementCount VF = ElementCount::getFixed(2);
777fe6060f1SDimitry Andric ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
7785ffd83dbSDimitry Andric Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
779fe6060f1SDimitry Andric for (ElementCount VF = ElementCount::getScalable(1);
780fe6060f1SDimitry Andric ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
781fe6060f1SDimitry Andric Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
782fe6060f1SDimitry Andric assert((WidestScalableVF.isZero() || !Scalarize) &&
783fe6060f1SDimitry Andric "Caller may decide to scalarize a variant using a scalable VF");
7845ffd83dbSDimitry Andric }
7855ffd83dbSDimitry Andric return Scalarize;
7865ffd83dbSDimitry Andric }
7875ffd83dbSDimitry Andric
canVectorizeInstrs()7880b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeInstrs() {
7890b57cec5SDimitry Andric BasicBlock *Header = TheLoop->getHeader();
7900b57cec5SDimitry Andric
7910b57cec5SDimitry Andric // For each block in the loop.
7920b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
7930b57cec5SDimitry Andric // Scan the instructions in the block and look for hazards.
7940b57cec5SDimitry Andric for (Instruction &I : *BB) {
7950b57cec5SDimitry Andric if (auto *Phi = dyn_cast<PHINode>(&I)) {
7960b57cec5SDimitry Andric Type *PhiTy = Phi->getType();
7970b57cec5SDimitry Andric // Check that this PHI type is allowed.
7980b57cec5SDimitry Andric if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
7990b57cec5SDimitry Andric !PhiTy->isPointerTy()) {
8000b57cec5SDimitry Andric reportVectorizationFailure("Found a non-int non-pointer PHI",
8010b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
8028bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
8030b57cec5SDimitry Andric return false;
8040b57cec5SDimitry Andric }
8050b57cec5SDimitry Andric
8060b57cec5SDimitry Andric // If this PHINode is not in the header block, then we know that we
8070b57cec5SDimitry Andric // can convert it to select during if-conversion. No need to check if
8080b57cec5SDimitry Andric // the PHIs in this block are induction or reduction variables.
8090b57cec5SDimitry Andric if (BB != Header) {
8100b57cec5SDimitry Andric // Non-header phi nodes that have outside uses can be vectorized. Add
8110b57cec5SDimitry Andric // them to the list of allowed exits.
8120b57cec5SDimitry Andric // Unsafe cyclic dependencies with header phis are identified during
813bdd1243dSDimitry Andric // legalization for reduction, induction and fixed order
8140b57cec5SDimitry Andric // recurrences.
8150b57cec5SDimitry Andric AllowedExit.insert(&I);
8160b57cec5SDimitry Andric continue;
8170b57cec5SDimitry Andric }
8180b57cec5SDimitry Andric
8190b57cec5SDimitry Andric // We only allow if-converted PHIs with exactly two incoming values.
8200b57cec5SDimitry Andric if (Phi->getNumIncomingValues() != 2) {
8210b57cec5SDimitry Andric reportVectorizationFailure("Found an invalid PHI",
8220b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
8238bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop, Phi);
8240b57cec5SDimitry Andric return false;
8250b57cec5SDimitry Andric }
8260b57cec5SDimitry Andric
8270b57cec5SDimitry Andric RecurrenceDescriptor RedDes;
8280b57cec5SDimitry Andric if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
82981ad6265SDimitry Andric DT, PSE.getSE())) {
830fe6060f1SDimitry Andric Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
8310b57cec5SDimitry Andric AllowedExit.insert(RedDes.getLoopExitInstr());
8320b57cec5SDimitry Andric Reductions[Phi] = RedDes;
8330b57cec5SDimitry Andric continue;
8340b57cec5SDimitry Andric }
8350b57cec5SDimitry Andric
836fe013be4SDimitry Andric // We prevent matching non-constant strided pointer IVS to preserve
837fe013be4SDimitry Andric // historical vectorizer behavior after a generalization of the
838fe013be4SDimitry Andric // IVDescriptor code. The intent is to remove this check, but we
839fe013be4SDimitry Andric // have to fix issues around code quality for such loops first.
840fe013be4SDimitry Andric auto isDisallowedStridedPointerInduction =
841fe013be4SDimitry Andric [](const InductionDescriptor &ID) {
842fe013be4SDimitry Andric if (AllowStridedPointerIVs)
843fe013be4SDimitry Andric return false;
844fe013be4SDimitry Andric return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
845fe013be4SDimitry Andric ID.getConstIntStepValue() == nullptr;
846fe013be4SDimitry Andric };
847fe013be4SDimitry Andric
848bdd1243dSDimitry Andric // TODO: Instead of recording the AllowedExit, it would be good to
849bdd1243dSDimitry Andric // record the complementary set: NotAllowedExit. These include (but may
850bdd1243dSDimitry Andric // not be limited to):
8510b57cec5SDimitry Andric // 1. Reduction phis as they represent the one-before-last value, which
8520b57cec5SDimitry Andric // is not available when vectorized
8530b57cec5SDimitry Andric // 2. Induction phis and increment when SCEV predicates cannot be used
8540b57cec5SDimitry Andric // outside the loop - see addInductionPhi
8550b57cec5SDimitry Andric // 3. Non-Phis with outside uses when SCEV predicates cannot be used
8560b57cec5SDimitry Andric // outside the loop - see call to hasOutsideLoopUser in the non-phi
8570b57cec5SDimitry Andric // handling below
858bdd1243dSDimitry Andric // 4. FixedOrderRecurrence phis that can possibly be handled by
8590b57cec5SDimitry Andric // extraction.
8600b57cec5SDimitry Andric // By recording these, we can then reason about ways to vectorize each
8610b57cec5SDimitry Andric // of these NotAllowedExit.
8620b57cec5SDimitry Andric InductionDescriptor ID;
863fe013be4SDimitry Andric if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
864fe013be4SDimitry Andric !isDisallowedStridedPointerInduction(ID)) {
8650b57cec5SDimitry Andric addInductionPhi(Phi, ID, AllowedExit);
866fe6060f1SDimitry Andric Requirements->addExactFPMathInst(ID.getExactFPMathInst());
8670b57cec5SDimitry Andric continue;
8680b57cec5SDimitry Andric }
8690b57cec5SDimitry Andric
870fe013be4SDimitry Andric if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
8715ffd83dbSDimitry Andric AllowedExit.insert(Phi);
872bdd1243dSDimitry Andric FixedOrderRecurrences.insert(Phi);
8730b57cec5SDimitry Andric continue;
8740b57cec5SDimitry Andric }
8750b57cec5SDimitry Andric
8760b57cec5SDimitry Andric // As a last resort, coerce the PHI to a AddRec expression
8770b57cec5SDimitry Andric // and re-try classifying it a an induction PHI.
878fe013be4SDimitry Andric if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
879fe013be4SDimitry Andric !isDisallowedStridedPointerInduction(ID)) {
8800b57cec5SDimitry Andric addInductionPhi(Phi, ID, AllowedExit);
8810b57cec5SDimitry Andric continue;
8820b57cec5SDimitry Andric }
8830b57cec5SDimitry Andric
8840b57cec5SDimitry Andric reportVectorizationFailure("Found an unidentified PHI",
8850b57cec5SDimitry Andric "value that could not be identified as "
8860b57cec5SDimitry Andric "reduction is used outside the loop",
8878bcb0991SDimitry Andric "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
8880b57cec5SDimitry Andric return false;
8890b57cec5SDimitry Andric } // end of PHI handling
8900b57cec5SDimitry Andric
8910b57cec5SDimitry Andric // We handle calls that:
8920b57cec5SDimitry Andric // * Are debug info intrinsics.
8930b57cec5SDimitry Andric // * Have a mapping to an IR intrinsic.
8940b57cec5SDimitry Andric // * Have a vector version available.
8950b57cec5SDimitry Andric auto *CI = dyn_cast<CallInst>(&I);
8965ffd83dbSDimitry Andric
8970b57cec5SDimitry Andric if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
8980b57cec5SDimitry Andric !isa<DbgInfoIntrinsic>(CI) &&
8990b57cec5SDimitry Andric !(CI->getCalledFunction() && TLI &&
9005ffd83dbSDimitry Andric (!VFDatabase::getMappings(*CI).empty() ||
9015ffd83dbSDimitry Andric isTLIScalarize(*TLI, *CI)))) {
9020b57cec5SDimitry Andric // If the call is a recognized math libary call, it is likely that
9030b57cec5SDimitry Andric // we can vectorize it given loosened floating-point constraints.
9040b57cec5SDimitry Andric LibFunc Func;
9050b57cec5SDimitry Andric bool IsMathLibCall =
9060b57cec5SDimitry Andric TLI && CI->getCalledFunction() &&
9070b57cec5SDimitry Andric CI->getType()->isFloatingPointTy() &&
9080b57cec5SDimitry Andric TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
9090b57cec5SDimitry Andric TLI->hasOptimizedCodeGen(Func);
9100b57cec5SDimitry Andric
9110b57cec5SDimitry Andric if (IsMathLibCall) {
9120b57cec5SDimitry Andric // TODO: Ideally, we should not use clang-specific language here,
9130b57cec5SDimitry Andric // but it's hard to provide meaningful yet generic advice.
9140b57cec5SDimitry Andric // Also, should this be guarded by allowExtraAnalysis() and/or be part
9150b57cec5SDimitry Andric // of the returned info from isFunctionVectorizable()?
9165ffd83dbSDimitry Andric reportVectorizationFailure(
9175ffd83dbSDimitry Andric "Found a non-intrinsic callsite",
9180b57cec5SDimitry Andric "library call cannot be vectorized. "
9190b57cec5SDimitry Andric "Try compiling with -fno-math-errno, -ffast-math, "
9200b57cec5SDimitry Andric "or similar flags",
9218bcb0991SDimitry Andric "CantVectorizeLibcall", ORE, TheLoop, CI);
9220b57cec5SDimitry Andric } else {
9230b57cec5SDimitry Andric reportVectorizationFailure("Found a non-intrinsic callsite",
9240b57cec5SDimitry Andric "call instruction cannot be vectorized",
9258bcb0991SDimitry Andric "CantVectorizeLibcall", ORE, TheLoop, CI);
9260b57cec5SDimitry Andric }
9270b57cec5SDimitry Andric return false;
9280b57cec5SDimitry Andric }
9290b57cec5SDimitry Andric
9300b57cec5SDimitry Andric // Some intrinsics have scalar arguments and should be same in order for
9310b57cec5SDimitry Andric // them to be vectorized (i.e. loop invariant).
9320b57cec5SDimitry Andric if (CI) {
9330b57cec5SDimitry Andric auto *SE = PSE.getSE();
9340b57cec5SDimitry Andric Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
935349cc55cSDimitry Andric for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
93681ad6265SDimitry Andric if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
9370b57cec5SDimitry Andric if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
9380b57cec5SDimitry Andric reportVectorizationFailure("Found unvectorizable intrinsic",
9390b57cec5SDimitry Andric "intrinsic instruction cannot be vectorized",
9408bcb0991SDimitry Andric "CantVectorizeIntrinsic", ORE, TheLoop, CI);
9410b57cec5SDimitry Andric return false;
9420b57cec5SDimitry Andric }
9430b57cec5SDimitry Andric }
9440b57cec5SDimitry Andric }
9450b57cec5SDimitry Andric
946*c9157d92SDimitry Andric // If we found a vectorized variant of a function, note that so LV can
947*c9157d92SDimitry Andric // make better decisions about maximum VF.
948*c9157d92SDimitry Andric if (CI && !VFDatabase::getMappings(*CI).empty())
949*c9157d92SDimitry Andric VecCallVariantsFound = true;
950*c9157d92SDimitry Andric
9510b57cec5SDimitry Andric // Check that the instruction return type is vectorizable.
9520b57cec5SDimitry Andric // Also, we can't vectorize extractelement instructions.
9530b57cec5SDimitry Andric if ((!VectorType::isValidElementType(I.getType()) &&
9540b57cec5SDimitry Andric !I.getType()->isVoidTy()) ||
9550b57cec5SDimitry Andric isa<ExtractElementInst>(I)) {
9560b57cec5SDimitry Andric reportVectorizationFailure("Found unvectorizable type",
9570b57cec5SDimitry Andric "instruction return type cannot be vectorized",
9588bcb0991SDimitry Andric "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
9590b57cec5SDimitry Andric return false;
9600b57cec5SDimitry Andric }
9610b57cec5SDimitry Andric
9620b57cec5SDimitry Andric // Check that the stored type is vectorizable.
9630b57cec5SDimitry Andric if (auto *ST = dyn_cast<StoreInst>(&I)) {
9640b57cec5SDimitry Andric Type *T = ST->getValueOperand()->getType();
9650b57cec5SDimitry Andric if (!VectorType::isValidElementType(T)) {
9660b57cec5SDimitry Andric reportVectorizationFailure("Store instruction cannot be vectorized",
9670b57cec5SDimitry Andric "store instruction cannot be vectorized",
9688bcb0991SDimitry Andric "CantVectorizeStore", ORE, TheLoop, ST);
9690b57cec5SDimitry Andric return false;
9700b57cec5SDimitry Andric }
9710b57cec5SDimitry Andric
9720b57cec5SDimitry Andric // For nontemporal stores, check that a nontemporal vector version is
9730b57cec5SDimitry Andric // supported on the target.
9740b57cec5SDimitry Andric if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
9750b57cec5SDimitry Andric // Arbitrarily try a vector of 2 elements.
976e8d8bef9SDimitry Andric auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
9770b57cec5SDimitry Andric assert(VecTy && "did not find vectorized version of stored type");
9785ffd83dbSDimitry Andric if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
9790b57cec5SDimitry Andric reportVectorizationFailure(
9800b57cec5SDimitry Andric "nontemporal store instruction cannot be vectorized",
9810b57cec5SDimitry Andric "nontemporal store instruction cannot be vectorized",
9828bcb0991SDimitry Andric "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
9830b57cec5SDimitry Andric return false;
9840b57cec5SDimitry Andric }
9850b57cec5SDimitry Andric }
9860b57cec5SDimitry Andric
9870b57cec5SDimitry Andric } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
9880b57cec5SDimitry Andric if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
9890b57cec5SDimitry Andric // For nontemporal loads, check that a nontemporal vector version is
9900b57cec5SDimitry Andric // supported on the target (arbitrarily try a vector of 2 elements).
991e8d8bef9SDimitry Andric auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
9920b57cec5SDimitry Andric assert(VecTy && "did not find vectorized version of load type");
9935ffd83dbSDimitry Andric if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
9940b57cec5SDimitry Andric reportVectorizationFailure(
9950b57cec5SDimitry Andric "nontemporal load instruction cannot be vectorized",
9960b57cec5SDimitry Andric "nontemporal load instruction cannot be vectorized",
9978bcb0991SDimitry Andric "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
9980b57cec5SDimitry Andric return false;
9990b57cec5SDimitry Andric }
10000b57cec5SDimitry Andric }
10010b57cec5SDimitry Andric
10020b57cec5SDimitry Andric // FP instructions can allow unsafe algebra, thus vectorizable by
10030b57cec5SDimitry Andric // non-IEEE-754 compliant SIMD units.
10040b57cec5SDimitry Andric // This applies to floating-point math operations and calls, not memory
10050b57cec5SDimitry Andric // operations, shuffles, or casts, as they don't change precision or
10060b57cec5SDimitry Andric // semantics.
10070b57cec5SDimitry Andric } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
10080b57cec5SDimitry Andric !I.isFast()) {
10090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
10100b57cec5SDimitry Andric Hints->setPotentiallyUnsafe();
10110b57cec5SDimitry Andric }
10120b57cec5SDimitry Andric
10130b57cec5SDimitry Andric // Reduction instructions are allowed to have exit users.
10140b57cec5SDimitry Andric // All other instructions must not have external users.
10150b57cec5SDimitry Andric if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
10160b57cec5SDimitry Andric // We can safely vectorize loops where instructions within the loop are
10170b57cec5SDimitry Andric // used outside the loop only if the SCEV predicates within the loop is
10180b57cec5SDimitry Andric // same as outside the loop. Allowing the exit means reusing the SCEV
10190b57cec5SDimitry Andric // outside the loop.
102081ad6265SDimitry Andric if (PSE.getPredicate().isAlwaysTrue()) {
10210b57cec5SDimitry Andric AllowedExit.insert(&I);
10220b57cec5SDimitry Andric continue;
10230b57cec5SDimitry Andric }
10240b57cec5SDimitry Andric reportVectorizationFailure("Value cannot be used outside the loop",
10250b57cec5SDimitry Andric "value cannot be used outside the loop",
10268bcb0991SDimitry Andric "ValueUsedOutsideLoop", ORE, TheLoop, &I);
10270b57cec5SDimitry Andric return false;
10280b57cec5SDimitry Andric }
10290b57cec5SDimitry Andric } // next instr.
10300b57cec5SDimitry Andric }
10310b57cec5SDimitry Andric
10320b57cec5SDimitry Andric if (!PrimaryInduction) {
10330b57cec5SDimitry Andric if (Inductions.empty()) {
10340b57cec5SDimitry Andric reportVectorizationFailure("Did not find one integer induction var",
10350b57cec5SDimitry Andric "loop induction variable could not be identified",
10368bcb0991SDimitry Andric "NoInductionVariable", ORE, TheLoop);
10370b57cec5SDimitry Andric return false;
10380b57cec5SDimitry Andric } else if (!WidestIndTy) {
10390b57cec5SDimitry Andric reportVectorizationFailure("Did not find one integer induction var",
10400b57cec5SDimitry Andric "integer loop induction variable could not be identified",
10418bcb0991SDimitry Andric "NoIntegerInductionVariable", ORE, TheLoop);
10420b57cec5SDimitry Andric return false;
10430b57cec5SDimitry Andric } else {
10440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
10450b57cec5SDimitry Andric }
10460b57cec5SDimitry Andric }
10470b57cec5SDimitry Andric
10480b57cec5SDimitry Andric // Now we know the widest induction type, check if our found induction
10490b57cec5SDimitry Andric // is the same size. If it's not, unset it here and InnerLoopVectorizer
10500b57cec5SDimitry Andric // will create another.
10510b57cec5SDimitry Andric if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
10520b57cec5SDimitry Andric PrimaryInduction = nullptr;
10530b57cec5SDimitry Andric
10540b57cec5SDimitry Andric return true;
10550b57cec5SDimitry Andric }
10560b57cec5SDimitry Andric
canVectorizeMemory()10570b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeMemory() {
1058bdd1243dSDimitry Andric LAI = &LAIs.getInfo(*TheLoop);
10590b57cec5SDimitry Andric const OptimizationRemarkAnalysis *LAR = LAI->getReport();
10600b57cec5SDimitry Andric if (LAR) {
10610b57cec5SDimitry Andric ORE->emit([&]() {
10620b57cec5SDimitry Andric return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
10630b57cec5SDimitry Andric "loop not vectorized: ", *LAR);
10640b57cec5SDimitry Andric });
10650b57cec5SDimitry Andric }
1066fe6060f1SDimitry Andric
10670b57cec5SDimitry Andric if (!LAI->canVectorizeMemory())
10680b57cec5SDimitry Andric return false;
10690b57cec5SDimitry Andric
107081ad6265SDimitry Andric // We can vectorize stores to invariant address when final reduction value is
107181ad6265SDimitry Andric // guaranteed to be stored at the end of the loop. Also, if decision to
107281ad6265SDimitry Andric // vectorize loop is made, runtime checks are added so as to make sure that
107381ad6265SDimitry Andric // invariant address won't alias with any other objects.
107481ad6265SDimitry Andric if (!LAI->getStoresToInvariantAddresses().empty()) {
1075bdd1243dSDimitry Andric // For each invariant address, check if last stored value is unconditional
1076bdd1243dSDimitry Andric // and the address is not calculated inside the loop.
107781ad6265SDimitry Andric for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1078bdd1243dSDimitry Andric if (!isInvariantStoreOfReduction(SI))
1079bdd1243dSDimitry Andric continue;
1080bdd1243dSDimitry Andric
1081bdd1243dSDimitry Andric if (blockNeedsPredication(SI->getParent())) {
108281ad6265SDimitry Andric reportVectorizationFailure(
108381ad6265SDimitry Andric "We don't allow storing to uniform addresses",
108481ad6265SDimitry Andric "write of conditional recurring variant value to a loop "
108581ad6265SDimitry Andric "invariant address could not be vectorized",
10868bcb0991SDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
10870b57cec5SDimitry Andric return false;
10880b57cec5SDimitry Andric }
1089bdd1243dSDimitry Andric
1090bdd1243dSDimitry Andric // Invariant address should be defined outside of loop. LICM pass usually
1091bdd1243dSDimitry Andric // makes sure it happens, but in rare cases it does not, we do not want
1092bdd1243dSDimitry Andric // to overcomplicate vectorization to support this case.
1093bdd1243dSDimitry Andric if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1094bdd1243dSDimitry Andric if (TheLoop->contains(Ptr)) {
1095bdd1243dSDimitry Andric reportVectorizationFailure(
1096bdd1243dSDimitry Andric "Invariant address is calculated inside the loop",
1097bdd1243dSDimitry Andric "write to a loop invariant address could not "
1098bdd1243dSDimitry Andric "be vectorized",
1099bdd1243dSDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1100bdd1243dSDimitry Andric return false;
1101bdd1243dSDimitry Andric }
1102bdd1243dSDimitry Andric }
110381ad6265SDimitry Andric }
110481ad6265SDimitry Andric
110581ad6265SDimitry Andric if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
110681ad6265SDimitry Andric // For each invariant address, check its last stored value is the result
110781ad6265SDimitry Andric // of one of our reductions.
110881ad6265SDimitry Andric //
110981ad6265SDimitry Andric // We do not check if dependence with loads exists because they are
111081ad6265SDimitry Andric // currently rejected earlier in LoopAccessInfo::analyzeLoop. In case this
111181ad6265SDimitry Andric // behaviour changes we have to modify this code.
111281ad6265SDimitry Andric ScalarEvolution *SE = PSE.getSE();
111381ad6265SDimitry Andric SmallVector<StoreInst *, 4> UnhandledStores;
111481ad6265SDimitry Andric for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
111581ad6265SDimitry Andric if (isInvariantStoreOfReduction(SI)) {
111681ad6265SDimitry Andric // Earlier stores to this address are effectively deadcode.
111781ad6265SDimitry Andric // With opaque pointers it is possible for one pointer to be used with
111881ad6265SDimitry Andric // different sizes of stored values:
111981ad6265SDimitry Andric // store i32 0, ptr %x
112081ad6265SDimitry Andric // store i8 0, ptr %x
112181ad6265SDimitry Andric // The latest store doesn't complitely overwrite the first one in the
112281ad6265SDimitry Andric // example. That is why we have to make sure that types of stored
112381ad6265SDimitry Andric // values are same.
112481ad6265SDimitry Andric // TODO: Check that bitwidth of unhandled store is smaller then the
112581ad6265SDimitry Andric // one that overwrites it and add a test.
112681ad6265SDimitry Andric erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
112781ad6265SDimitry Andric return storeToSameAddress(SE, SI, I) &&
112881ad6265SDimitry Andric I->getValueOperand()->getType() ==
112981ad6265SDimitry Andric SI->getValueOperand()->getType();
113081ad6265SDimitry Andric });
113181ad6265SDimitry Andric continue;
113281ad6265SDimitry Andric }
113381ad6265SDimitry Andric UnhandledStores.push_back(SI);
113481ad6265SDimitry Andric }
113581ad6265SDimitry Andric
113681ad6265SDimitry Andric bool IsOK = UnhandledStores.empty();
113781ad6265SDimitry Andric // TODO: we should also validate against InvariantMemSets.
113881ad6265SDimitry Andric if (!IsOK) {
113981ad6265SDimitry Andric reportVectorizationFailure(
114081ad6265SDimitry Andric "We don't allow storing to uniform addresses",
114181ad6265SDimitry Andric "write to a loop invariant address could not "
114281ad6265SDimitry Andric "be vectorized",
114381ad6265SDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
114481ad6265SDimitry Andric return false;
114581ad6265SDimitry Andric }
114681ad6265SDimitry Andric }
114781ad6265SDimitry Andric }
1148fe6060f1SDimitry Andric
114981ad6265SDimitry Andric PSE.addPredicate(LAI->getPSE().getPredicate());
11500b57cec5SDimitry Andric return true;
11510b57cec5SDimitry Andric }
11520b57cec5SDimitry Andric
canVectorizeFPMath(bool EnableStrictReductions)1153fe6060f1SDimitry Andric bool LoopVectorizationLegality::canVectorizeFPMath(
1154fe6060f1SDimitry Andric bool EnableStrictReductions) {
1155fe6060f1SDimitry Andric
1156fe6060f1SDimitry Andric // First check if there is any ExactFP math or if we allow reassociations
1157fe6060f1SDimitry Andric if (!Requirements->getExactFPInst() || Hints->allowReordering())
1158fe6060f1SDimitry Andric return true;
1159fe6060f1SDimitry Andric
1160fe6060f1SDimitry Andric // If the above is false, we have ExactFPMath & do not allow reordering.
1161fe6060f1SDimitry Andric // If the EnableStrictReductions flag is set, first check if we have any
1162fe6060f1SDimitry Andric // Exact FP induction vars, which we cannot vectorize.
1163fe6060f1SDimitry Andric if (!EnableStrictReductions ||
1164fe6060f1SDimitry Andric any_of(getInductionVars(), [&](auto &Induction) -> bool {
1165fe6060f1SDimitry Andric InductionDescriptor IndDesc = Induction.second;
1166fe6060f1SDimitry Andric return IndDesc.getExactFPMathInst();
1167fe6060f1SDimitry Andric }))
1168fe6060f1SDimitry Andric return false;
1169fe6060f1SDimitry Andric
1170fe6060f1SDimitry Andric // We can now only vectorize if all reductions with Exact FP math also
1171fe6060f1SDimitry Andric // have the isOrdered flag set, which indicates that we can move the
1172fe6060f1SDimitry Andric // reduction operations in-loop.
1173fe6060f1SDimitry Andric return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1174fe6060f1SDimitry Andric const RecurrenceDescriptor &RdxDesc = Reduction.second;
1175fe6060f1SDimitry Andric return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1176fe6060f1SDimitry Andric }));
1177fe6060f1SDimitry Andric }
1178fe6060f1SDimitry Andric
isInvariantStoreOfReduction(StoreInst * SI)117981ad6265SDimitry Andric bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) {
118081ad6265SDimitry Andric return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
118181ad6265SDimitry Andric const RecurrenceDescriptor &RdxDesc = Reduction.second;
118281ad6265SDimitry Andric return RdxDesc.IntermediateStore == SI;
118381ad6265SDimitry Andric });
118481ad6265SDimitry Andric }
118581ad6265SDimitry Andric
isInvariantAddressOfReduction(Value * V)118681ad6265SDimitry Andric bool LoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) {
118781ad6265SDimitry Andric return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
118881ad6265SDimitry Andric const RecurrenceDescriptor &RdxDesc = Reduction.second;
118981ad6265SDimitry Andric if (!RdxDesc.IntermediateStore)
119081ad6265SDimitry Andric return false;
119181ad6265SDimitry Andric
119281ad6265SDimitry Andric ScalarEvolution *SE = PSE.getSE();
119381ad6265SDimitry Andric Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
119481ad6265SDimitry Andric return V == InvariantAddress ||
119581ad6265SDimitry Andric SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
119681ad6265SDimitry Andric });
119781ad6265SDimitry Andric }
119881ad6265SDimitry Andric
isInductionPhi(const Value * V) const11990eae32dcSDimitry Andric bool LoopVectorizationLegality::isInductionPhi(const Value *V) const {
12000b57cec5SDimitry Andric Value *In0 = const_cast<Value *>(V);
12010b57cec5SDimitry Andric PHINode *PN = dyn_cast_or_null<PHINode>(In0);
12020b57cec5SDimitry Andric if (!PN)
12030b57cec5SDimitry Andric return false;
12040b57cec5SDimitry Andric
12050b57cec5SDimitry Andric return Inductions.count(PN);
12060b57cec5SDimitry Andric }
12070b57cec5SDimitry Andric
12080eae32dcSDimitry Andric const InductionDescriptor *
getIntOrFpInductionDescriptor(PHINode * Phi) const12090eae32dcSDimitry Andric LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const {
12100eae32dcSDimitry Andric if (!isInductionPhi(Phi))
12110eae32dcSDimitry Andric return nullptr;
12120eae32dcSDimitry Andric auto &ID = getInductionVars().find(Phi)->second;
12130eae32dcSDimitry Andric if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
12140eae32dcSDimitry Andric ID.getKind() == InductionDescriptor::IK_FpInduction)
12150eae32dcSDimitry Andric return &ID;
12160eae32dcSDimitry Andric return nullptr;
12170eae32dcSDimitry Andric }
12180eae32dcSDimitry Andric
121981ad6265SDimitry Andric const InductionDescriptor *
getPointerInductionDescriptor(PHINode * Phi) const122081ad6265SDimitry Andric LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const {
122181ad6265SDimitry Andric if (!isInductionPhi(Phi))
122281ad6265SDimitry Andric return nullptr;
122381ad6265SDimitry Andric auto &ID = getInductionVars().find(Phi)->second;
122481ad6265SDimitry Andric if (ID.getKind() == InductionDescriptor::IK_PtrInduction)
122581ad6265SDimitry Andric return &ID;
122681ad6265SDimitry Andric return nullptr;
122781ad6265SDimitry Andric }
122881ad6265SDimitry Andric
isCastedInductionVariable(const Value * V) const12290eae32dcSDimitry Andric bool LoopVectorizationLegality::isCastedInductionVariable(
12300eae32dcSDimitry Andric const Value *V) const {
12310b57cec5SDimitry Andric auto *Inst = dyn_cast<Instruction>(V);
12320b57cec5SDimitry Andric return (Inst && InductionCastsToIgnore.count(Inst));
12330b57cec5SDimitry Andric }
12340b57cec5SDimitry Andric
isInductionVariable(const Value * V) const12350eae32dcSDimitry Andric bool LoopVectorizationLegality::isInductionVariable(const Value *V) const {
12360b57cec5SDimitry Andric return isInductionPhi(V) || isCastedInductionVariable(V);
12370b57cec5SDimitry Andric }
12380b57cec5SDimitry Andric
isFixedOrderRecurrence(const PHINode * Phi) const1239bdd1243dSDimitry Andric bool LoopVectorizationLegality::isFixedOrderRecurrence(
12400eae32dcSDimitry Andric const PHINode *Phi) const {
1241bdd1243dSDimitry Andric return FixedOrderRecurrences.count(Phi);
12420b57cec5SDimitry Andric }
12430b57cec5SDimitry Andric
blockNeedsPredication(BasicBlock * BB) const1244fe6060f1SDimitry Andric bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
12450b57cec5SDimitry Andric return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
12460b57cec5SDimitry Andric }
12470b57cec5SDimitry Andric
blockCanBePredicated(BasicBlock * BB,SmallPtrSetImpl<Value * > & SafePtrs,SmallPtrSetImpl<const Instruction * > & MaskedOp) const12480b57cec5SDimitry Andric bool LoopVectorizationLegality::blockCanBePredicated(
1249e8d8bef9SDimitry Andric BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
1250*c9157d92SDimitry Andric SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
12510b57cec5SDimitry Andric for (Instruction &I : *BB) {
12525ffd83dbSDimitry Andric // We can predicate blocks with calls to assume, as long as we drop them in
12535ffd83dbSDimitry Andric // case we flatten the CFG via predication.
12545ffd83dbSDimitry Andric if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
1255*c9157d92SDimitry Andric MaskedOp.insert(&I);
12565ffd83dbSDimitry Andric continue;
12575ffd83dbSDimitry Andric }
12585ffd83dbSDimitry Andric
1259e8d8bef9SDimitry Andric // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1260e8d8bef9SDimitry Andric // TODO: there might be cases that it should block the vectorization. Let's
1261e8d8bef9SDimitry Andric // ignore those for now.
1262e8d8bef9SDimitry Andric if (isa<NoAliasScopeDeclInst>(&I))
1263e8d8bef9SDimitry Andric continue;
1264e8d8bef9SDimitry Andric
1265fe013be4SDimitry Andric // We can allow masked calls if there's at least one vector variant, even
1266fe013be4SDimitry Andric // if we end up scalarizing due to the cost model calculations.
1267fe013be4SDimitry Andric // TODO: Allow other calls if they have appropriate attributes... readonly
1268fe013be4SDimitry Andric // and argmemonly?
1269fe013be4SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&I))
1270fe013be4SDimitry Andric if (VFDatabase::hasMaskedVariant(*CI)) {
1271fe013be4SDimitry Andric MaskedOp.insert(CI);
1272fe013be4SDimitry Andric continue;
1273fe013be4SDimitry Andric }
1274fe013be4SDimitry Andric
1275bdd1243dSDimitry Andric // Loads are handled via masking (or speculated if safe to do so.)
1276bdd1243dSDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I)) {
1277bdd1243dSDimitry Andric if (!SafePtrs.count(LI->getPointerOperand()))
12780b57cec5SDimitry Andric MaskedOp.insert(LI);
12790b57cec5SDimitry Andric continue;
12800b57cec5SDimitry Andric }
12810b57cec5SDimitry Andric
12820b57cec5SDimitry Andric // Predicated store requires some form of masking:
12830b57cec5SDimitry Andric // 1) masked store HW instruction,
12840b57cec5SDimitry Andric // 2) emulation via load-blend-store (only if safe and legal to do so,
12850b57cec5SDimitry Andric // be aware on the race conditions), or
12860b57cec5SDimitry Andric // 3) element-by-element predicate check and scalar store.
1287bdd1243dSDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) {
12880b57cec5SDimitry Andric MaskedOp.insert(SI);
12890b57cec5SDimitry Andric continue;
12900b57cec5SDimitry Andric }
1291bdd1243dSDimitry Andric
1292bdd1243dSDimitry Andric if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
12930b57cec5SDimitry Andric return false;
12940b57cec5SDimitry Andric }
12950b57cec5SDimitry Andric
12960b57cec5SDimitry Andric return true;
12970b57cec5SDimitry Andric }
12980b57cec5SDimitry Andric
canVectorizeWithIfConvert()12990b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
13000b57cec5SDimitry Andric if (!EnableIfConversion) {
13010b57cec5SDimitry Andric reportVectorizationFailure("If-conversion is disabled",
13020b57cec5SDimitry Andric "if-conversion is disabled",
13038bcb0991SDimitry Andric "IfConversionDisabled",
13048bcb0991SDimitry Andric ORE, TheLoop);
13050b57cec5SDimitry Andric return false;
13060b57cec5SDimitry Andric }
13070b57cec5SDimitry Andric
13080b57cec5SDimitry Andric assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
13090b57cec5SDimitry Andric
13108bcb0991SDimitry Andric // A list of pointers which are known to be dereferenceable within scope of
13118bcb0991SDimitry Andric // the loop body for each iteration of the loop which executes. That is,
13128bcb0991SDimitry Andric // the memory pointed to can be dereferenced (with the access size implied by
13138bcb0991SDimitry Andric // the value's type) unconditionally within the loop header without
13148bcb0991SDimitry Andric // introducing a new fault.
13155ffd83dbSDimitry Andric SmallPtrSet<Value *, 8> SafePointers;
13160b57cec5SDimitry Andric
13170b57cec5SDimitry Andric // Collect safe addresses.
13180b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
13198bcb0991SDimitry Andric if (!blockNeedsPredication(BB)) {
13200b57cec5SDimitry Andric for (Instruction &I : *BB)
13210b57cec5SDimitry Andric if (auto *Ptr = getLoadStorePointerOperand(&I))
13225ffd83dbSDimitry Andric SafePointers.insert(Ptr);
13238bcb0991SDimitry Andric continue;
13248bcb0991SDimitry Andric }
13258bcb0991SDimitry Andric
13268bcb0991SDimitry Andric // For a block which requires predication, a address may be safe to access
13278bcb0991SDimitry Andric // in the loop w/o predication if we can prove dereferenceability facts
13288bcb0991SDimitry Andric // sufficient to ensure it'll never fault within the loop. For the moment,
13298bcb0991SDimitry Andric // we restrict this to loads; stores are more complicated due to
13308bcb0991SDimitry Andric // concurrency restrictions.
13318bcb0991SDimitry Andric ScalarEvolution &SE = *PSE.getSE();
13328bcb0991SDimitry Andric for (Instruction &I : *BB) {
13338bcb0991SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(&I);
1334e8d8bef9SDimitry Andric if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1335bdd1243dSDimitry Andric isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
13365ffd83dbSDimitry Andric SafePointers.insert(LI->getPointerOperand());
13378bcb0991SDimitry Andric }
13380b57cec5SDimitry Andric }
13390b57cec5SDimitry Andric
13400b57cec5SDimitry Andric // Collect the blocks that need predication.
13410b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
13420b57cec5SDimitry Andric // We don't support switch statements inside loops.
13430b57cec5SDimitry Andric if (!isa<BranchInst>(BB->getTerminator())) {
13440b57cec5SDimitry Andric reportVectorizationFailure("Loop contains a switch statement",
13450b57cec5SDimitry Andric "loop contains a switch statement",
13468bcb0991SDimitry Andric "LoopContainsSwitch", ORE, TheLoop,
13478bcb0991SDimitry Andric BB->getTerminator());
13480b57cec5SDimitry Andric return false;
13490b57cec5SDimitry Andric }
13500b57cec5SDimitry Andric
13510b57cec5SDimitry Andric // We must be able to predicate all blocks that need to be predicated.
1352*c9157d92SDimitry Andric if (blockNeedsPredication(BB) &&
1353*c9157d92SDimitry Andric !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
13540b57cec5SDimitry Andric reportVectorizationFailure(
13550b57cec5SDimitry Andric "Control flow cannot be substituted for a select",
1356*c9157d92SDimitry Andric "control flow cannot be substituted for a select", "NoCFGForSelect",
1357*c9157d92SDimitry Andric ORE, TheLoop, BB->getTerminator());
13580b57cec5SDimitry Andric return false;
13590b57cec5SDimitry Andric }
13600b57cec5SDimitry Andric }
13610b57cec5SDimitry Andric
13620b57cec5SDimitry Andric // We can if-convert this loop.
13630b57cec5SDimitry Andric return true;
13640b57cec5SDimitry Andric }
13650b57cec5SDimitry Andric
13660b57cec5SDimitry Andric // Helper function to canVectorizeLoopNestCFG.
canVectorizeLoopCFG(Loop * Lp,bool UseVPlanNativePath)13670b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
13680b57cec5SDimitry Andric bool UseVPlanNativePath) {
1369e8d8bef9SDimitry Andric assert((UseVPlanNativePath || Lp->isInnermost()) &&
13700b57cec5SDimitry Andric "VPlan-native path is not enabled.");
13710b57cec5SDimitry Andric
13720b57cec5SDimitry Andric // TODO: ORE should be improved to show more accurate information when an
13730b57cec5SDimitry Andric // outer loop can't be vectorized because a nested loop is not understood or
13740b57cec5SDimitry Andric // legal. Something like: "outer_loop_location: loop not vectorized:
13750b57cec5SDimitry Andric // (inner_loop_location) loop control flow is not understood by vectorizer".
13760b57cec5SDimitry Andric
13770b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
13780b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
13790b57cec5SDimitry Andric bool Result = true;
13800b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
13810b57cec5SDimitry Andric
13820b57cec5SDimitry Andric // We must have a loop in canonical form. Loops with indirectbr in them cannot
13830b57cec5SDimitry Andric // be canonicalized.
13840b57cec5SDimitry Andric if (!Lp->getLoopPreheader()) {
13850b57cec5SDimitry Andric reportVectorizationFailure("Loop doesn't have a legal pre-header",
13860b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
13878bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
13880b57cec5SDimitry Andric if (DoExtraAnalysis)
13890b57cec5SDimitry Andric Result = false;
13900b57cec5SDimitry Andric else
13910b57cec5SDimitry Andric return false;
13920b57cec5SDimitry Andric }
13930b57cec5SDimitry Andric
13940b57cec5SDimitry Andric // We must have a single backedge.
13950b57cec5SDimitry Andric if (Lp->getNumBackEdges() != 1) {
13960b57cec5SDimitry Andric reportVectorizationFailure("The loop must have a single backedge",
13970b57cec5SDimitry Andric "loop control flow is not understood by vectorizer",
13988bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop);
13990b57cec5SDimitry Andric if (DoExtraAnalysis)
14000b57cec5SDimitry Andric Result = false;
14010b57cec5SDimitry Andric else
14020b57cec5SDimitry Andric return false;
14030b57cec5SDimitry Andric }
14040b57cec5SDimitry Andric
14050b57cec5SDimitry Andric return Result;
14060b57cec5SDimitry Andric }
14070b57cec5SDimitry Andric
canVectorizeLoopNestCFG(Loop * Lp,bool UseVPlanNativePath)14080b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
14090b57cec5SDimitry Andric Loop *Lp, bool UseVPlanNativePath) {
14100b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
14110b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
14120b57cec5SDimitry Andric bool Result = true;
14130b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
14140b57cec5SDimitry Andric if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
14150b57cec5SDimitry Andric if (DoExtraAnalysis)
14160b57cec5SDimitry Andric Result = false;
14170b57cec5SDimitry Andric else
14180b57cec5SDimitry Andric return false;
14190b57cec5SDimitry Andric }
14200b57cec5SDimitry Andric
14210b57cec5SDimitry Andric // Recursively check whether the loop control flow of nested loops is
14220b57cec5SDimitry Andric // understood.
14230b57cec5SDimitry Andric for (Loop *SubLp : *Lp)
14240b57cec5SDimitry Andric if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
14250b57cec5SDimitry Andric if (DoExtraAnalysis)
14260b57cec5SDimitry Andric Result = false;
14270b57cec5SDimitry Andric else
14280b57cec5SDimitry Andric return false;
14290b57cec5SDimitry Andric }
14300b57cec5SDimitry Andric
14310b57cec5SDimitry Andric return Result;
14320b57cec5SDimitry Andric }
14330b57cec5SDimitry Andric
canVectorize(bool UseVPlanNativePath)14340b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
14350b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case
14360b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
14370b57cec5SDimitry Andric bool Result = true;
14380b57cec5SDimitry Andric
14390b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
14400b57cec5SDimitry Andric // Check whether the loop-related control flow in the loop nest is expected by
14410b57cec5SDimitry Andric // vectorizer.
14420b57cec5SDimitry Andric if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
14430b57cec5SDimitry Andric if (DoExtraAnalysis)
14440b57cec5SDimitry Andric Result = false;
14450b57cec5SDimitry Andric else
14460b57cec5SDimitry Andric return false;
14470b57cec5SDimitry Andric }
14480b57cec5SDimitry Andric
14490b57cec5SDimitry Andric // We need to have a loop header.
14500b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
14510b57cec5SDimitry Andric << '\n');
14520b57cec5SDimitry Andric
14530b57cec5SDimitry Andric // Specific checks for outer loops. We skip the remaining legal checks at this
14540b57cec5SDimitry Andric // point because they don't support outer loops.
1455e8d8bef9SDimitry Andric if (!TheLoop->isInnermost()) {
14560b57cec5SDimitry Andric assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
14570b57cec5SDimitry Andric
14580b57cec5SDimitry Andric if (!canVectorizeOuterLoop()) {
14590b57cec5SDimitry Andric reportVectorizationFailure("Unsupported outer loop",
14600b57cec5SDimitry Andric "unsupported outer loop",
14618bcb0991SDimitry Andric "UnsupportedOuterLoop",
14628bcb0991SDimitry Andric ORE, TheLoop);
14630b57cec5SDimitry Andric // TODO: Implement DoExtraAnalysis when subsequent legal checks support
14640b57cec5SDimitry Andric // outer loops.
14650b57cec5SDimitry Andric return false;
14660b57cec5SDimitry Andric }
14670b57cec5SDimitry Andric
14680b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
14690b57cec5SDimitry Andric return Result;
14700b57cec5SDimitry Andric }
14710b57cec5SDimitry Andric
1472e8d8bef9SDimitry Andric assert(TheLoop->isInnermost() && "Inner loop expected.");
14730b57cec5SDimitry Andric // Check if we can if-convert non-single-bb loops.
14740b57cec5SDimitry Andric unsigned NumBlocks = TheLoop->getNumBlocks();
14750b57cec5SDimitry Andric if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
14760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
14770b57cec5SDimitry Andric if (DoExtraAnalysis)
14780b57cec5SDimitry Andric Result = false;
14790b57cec5SDimitry Andric else
14800b57cec5SDimitry Andric return false;
14810b57cec5SDimitry Andric }
14820b57cec5SDimitry Andric
14830b57cec5SDimitry Andric // Check if we can vectorize the instructions and CFG in this loop.
14840b57cec5SDimitry Andric if (!canVectorizeInstrs()) {
14850b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
14860b57cec5SDimitry Andric if (DoExtraAnalysis)
14870b57cec5SDimitry Andric Result = false;
14880b57cec5SDimitry Andric else
14890b57cec5SDimitry Andric return false;
14900b57cec5SDimitry Andric }
14910b57cec5SDimitry Andric
14920b57cec5SDimitry Andric // Go over each instruction and look at memory deps.
14930b57cec5SDimitry Andric if (!canVectorizeMemory()) {
14940b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
14950b57cec5SDimitry Andric if (DoExtraAnalysis)
14960b57cec5SDimitry Andric Result = false;
14970b57cec5SDimitry Andric else
14980b57cec5SDimitry Andric return false;
14990b57cec5SDimitry Andric }
15000b57cec5SDimitry Andric
15010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
15020b57cec5SDimitry Andric << (LAI->getRuntimePointerChecking()->Need
15030b57cec5SDimitry Andric ? " (with a runtime bound check)"
15040b57cec5SDimitry Andric : "")
15050b57cec5SDimitry Andric << "!\n");
15060b57cec5SDimitry Andric
15070b57cec5SDimitry Andric unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
15080b57cec5SDimitry Andric if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
15090b57cec5SDimitry Andric SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
15100b57cec5SDimitry Andric
151181ad6265SDimitry Andric if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
15120b57cec5SDimitry Andric reportVectorizationFailure("Too many SCEV checks needed",
15130b57cec5SDimitry Andric "Too many SCEV assumptions need to be made and checked at runtime",
15148bcb0991SDimitry Andric "TooManySCEVRunTimeChecks", ORE, TheLoop);
15150b57cec5SDimitry Andric if (DoExtraAnalysis)
15160b57cec5SDimitry Andric Result = false;
15170b57cec5SDimitry Andric else
15180b57cec5SDimitry Andric return false;
15190b57cec5SDimitry Andric }
15200b57cec5SDimitry Andric
15210b57cec5SDimitry Andric // Okay! We've done all the tests. If any have failed, return false. Otherwise
15220b57cec5SDimitry Andric // we can vectorize, and at this point we don't have any other mem analysis
15230b57cec5SDimitry Andric // which may limit our maximum vectorization factor, so just return true with
15240b57cec5SDimitry Andric // no restrictions.
15250b57cec5SDimitry Andric return Result;
15260b57cec5SDimitry Andric }
15270b57cec5SDimitry Andric
prepareToFoldTailByMasking()15288bcb0991SDimitry Andric bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
15290b57cec5SDimitry Andric
15300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
15310b57cec5SDimitry Andric
15328bcb0991SDimitry Andric SmallPtrSet<const Value *, 8> ReductionLiveOuts;
15330b57cec5SDimitry Andric
1534bdd1243dSDimitry Andric for (const auto &Reduction : getReductionVars())
15358bcb0991SDimitry Andric ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
15368bcb0991SDimitry Andric
15378bcb0991SDimitry Andric // TODO: handle non-reduction outside users when tail is folded by masking.
15380b57cec5SDimitry Andric for (auto *AE : AllowedExit) {
15398bcb0991SDimitry Andric // Check that all users of allowed exit values are inside the loop or
15408bcb0991SDimitry Andric // are the live-out of a reduction.
15418bcb0991SDimitry Andric if (ReductionLiveOuts.count(AE))
15428bcb0991SDimitry Andric continue;
15430b57cec5SDimitry Andric for (User *U : AE->users()) {
15440b57cec5SDimitry Andric Instruction *UI = cast<Instruction>(U);
15450b57cec5SDimitry Andric if (TheLoop->contains(UI))
15460b57cec5SDimitry Andric continue;
1547e8d8bef9SDimitry Andric LLVM_DEBUG(
1548e8d8bef9SDimitry Andric dbgs()
1549e8d8bef9SDimitry Andric << "LV: Cannot fold tail by masking, loop has an outside user for "
1550e8d8bef9SDimitry Andric << *UI << "\n");
15510b57cec5SDimitry Andric return false;
15520b57cec5SDimitry Andric }
15530b57cec5SDimitry Andric }
15540b57cec5SDimitry Andric
15550b57cec5SDimitry Andric // The list of pointers that we can safely read and write to remains empty.
15560b57cec5SDimitry Andric SmallPtrSet<Value *, 8> SafePointers;
15570b57cec5SDimitry Andric
1558*c9157d92SDimitry Andric // Collect masked ops in temporary set first to avoid partially populating
1559*c9157d92SDimitry Andric // MaskedOp if a block cannot be predicated.
1560e8d8bef9SDimitry Andric SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
1561e8d8bef9SDimitry Andric
15620b57cec5SDimitry Andric // Check and mark all blocks for predication, including those that ordinarily
15630b57cec5SDimitry Andric // do not need predication such as the header block.
15640b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) {
1565*c9157d92SDimitry Andric if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1566e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
15670b57cec5SDimitry Andric return false;
15680b57cec5SDimitry Andric }
15690b57cec5SDimitry Andric }
15700b57cec5SDimitry Andric
15710b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1572e8d8bef9SDimitry Andric
1573e8d8bef9SDimitry Andric MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
15740b57cec5SDimitry Andric return true;
15750b57cec5SDimitry Andric }
15760b57cec5SDimitry Andric
15770b57cec5SDimitry Andric } // namespace llvm
1578