1*5f7ddb14SDimitry Andric //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===//
2*5f7ddb14SDimitry Andric //
3*5f7ddb14SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*5f7ddb14SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*5f7ddb14SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*5f7ddb14SDimitry Andric //
7*5f7ddb14SDimitry Andric //===----------------------------------------------------------------------===//
8*5f7ddb14SDimitry Andric //
9*5f7ddb14SDimitry Andric // This pass implements IR expansion for vector predication intrinsics, allowing
10*5f7ddb14SDimitry Andric // targets to enable vector predication until just before codegen.
11*5f7ddb14SDimitry Andric //
12*5f7ddb14SDimitry Andric //===----------------------------------------------------------------------===//
13*5f7ddb14SDimitry Andric
14*5f7ddb14SDimitry Andric #include "llvm/CodeGen/ExpandVectorPredication.h"
15*5f7ddb14SDimitry Andric #include "llvm/ADT/Statistic.h"
16*5f7ddb14SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
17*5f7ddb14SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
18*5f7ddb14SDimitry Andric #include "llvm/CodeGen/Passes.h"
19*5f7ddb14SDimitry Andric #include "llvm/IR/Constants.h"
20*5f7ddb14SDimitry Andric #include "llvm/IR/Function.h"
21*5f7ddb14SDimitry Andric #include "llvm/IR/IRBuilder.h"
22*5f7ddb14SDimitry Andric #include "llvm/IR/InstIterator.h"
23*5f7ddb14SDimitry Andric #include "llvm/IR/Instructions.h"
24*5f7ddb14SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
25*5f7ddb14SDimitry Andric #include "llvm/IR/Intrinsics.h"
26*5f7ddb14SDimitry Andric #include "llvm/IR/Module.h"
27*5f7ddb14SDimitry Andric #include "llvm/InitializePasses.h"
28*5f7ddb14SDimitry Andric #include "llvm/Pass.h"
29*5f7ddb14SDimitry Andric #include "llvm/Support/CommandLine.h"
30*5f7ddb14SDimitry Andric #include "llvm/Support/Compiler.h"
31*5f7ddb14SDimitry Andric #include "llvm/Support/Debug.h"
32*5f7ddb14SDimitry Andric #include "llvm/Support/MathExtras.h"
33*5f7ddb14SDimitry Andric
34*5f7ddb14SDimitry Andric using namespace llvm;
35*5f7ddb14SDimitry Andric
36*5f7ddb14SDimitry Andric using VPLegalization = TargetTransformInfo::VPLegalization;
37*5f7ddb14SDimitry Andric using VPTransform = TargetTransformInfo::VPLegalization::VPTransform;
38*5f7ddb14SDimitry Andric
39*5f7ddb14SDimitry Andric // Keep this in sync with TargetTransformInfo::VPLegalization.
40*5f7ddb14SDimitry Andric #define VPINTERNAL_VPLEGAL_CASES \
41*5f7ddb14SDimitry Andric VPINTERNAL_CASE(Legal) \
42*5f7ddb14SDimitry Andric VPINTERNAL_CASE(Discard) \
43*5f7ddb14SDimitry Andric VPINTERNAL_CASE(Convert)
44*5f7ddb14SDimitry Andric
45*5f7ddb14SDimitry Andric #define VPINTERNAL_CASE(X) "|" #X
46*5f7ddb14SDimitry Andric
47*5f7ddb14SDimitry Andric // Override options.
48*5f7ddb14SDimitry Andric static cl::opt<std::string> EVLTransformOverride(
49*5f7ddb14SDimitry Andric "expandvp-override-evl-transform", cl::init(""), cl::Hidden,
50*5f7ddb14SDimitry Andric cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
51*5f7ddb14SDimitry Andric ". If non-empty, ignore "
52*5f7ddb14SDimitry Andric "TargetTransformInfo and "
53*5f7ddb14SDimitry Andric "always use this transformation for the %evl parameter (Used in "
54*5f7ddb14SDimitry Andric "testing)."));
55*5f7ddb14SDimitry Andric
56*5f7ddb14SDimitry Andric static cl::opt<std::string> MaskTransformOverride(
57*5f7ddb14SDimitry Andric "expandvp-override-mask-transform", cl::init(""), cl::Hidden,
58*5f7ddb14SDimitry Andric cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES
59*5f7ddb14SDimitry Andric ". If non-empty, Ignore "
60*5f7ddb14SDimitry Andric "TargetTransformInfo and "
61*5f7ddb14SDimitry Andric "always use this transformation for the %mask parameter (Used in "
62*5f7ddb14SDimitry Andric "testing)."));
63*5f7ddb14SDimitry Andric
64*5f7ddb14SDimitry Andric #undef VPINTERNAL_CASE
65*5f7ddb14SDimitry Andric #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X)
66*5f7ddb14SDimitry Andric
parseOverrideOption(const std::string & TextOpt)67*5f7ddb14SDimitry Andric static VPTransform parseOverrideOption(const std::string &TextOpt) {
68*5f7ddb14SDimitry Andric return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES;
69*5f7ddb14SDimitry Andric }
70*5f7ddb14SDimitry Andric
71*5f7ddb14SDimitry Andric #undef VPINTERNAL_VPLEGAL_CASES
72*5f7ddb14SDimitry Andric
73*5f7ddb14SDimitry Andric // Whether any override options are set.
anyExpandVPOverridesSet()74*5f7ddb14SDimitry Andric static bool anyExpandVPOverridesSet() {
75*5f7ddb14SDimitry Andric return !EVLTransformOverride.empty() || !MaskTransformOverride.empty();
76*5f7ddb14SDimitry Andric }
77*5f7ddb14SDimitry Andric
78*5f7ddb14SDimitry Andric #define DEBUG_TYPE "expandvp"
79*5f7ddb14SDimitry Andric
80*5f7ddb14SDimitry Andric STATISTIC(NumFoldedVL, "Number of folded vector length params");
81*5f7ddb14SDimitry Andric STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations");
82*5f7ddb14SDimitry Andric
83*5f7ddb14SDimitry Andric ///// Helpers {
84*5f7ddb14SDimitry Andric
85*5f7ddb14SDimitry Andric /// \returns Whether the vector mask \p MaskVal has all lane bits set.
isAllTrueMask(Value * MaskVal)86*5f7ddb14SDimitry Andric static bool isAllTrueMask(Value *MaskVal) {
87*5f7ddb14SDimitry Andric auto *ConstVec = dyn_cast<ConstantVector>(MaskVal);
88*5f7ddb14SDimitry Andric return ConstVec && ConstVec->isAllOnesValue();
89*5f7ddb14SDimitry Andric }
90*5f7ddb14SDimitry Andric
91*5f7ddb14SDimitry Andric /// \returns A non-excepting divisor constant for this type.
getSafeDivisor(Type * DivTy)92*5f7ddb14SDimitry Andric static Constant *getSafeDivisor(Type *DivTy) {
93*5f7ddb14SDimitry Andric assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type");
94*5f7ddb14SDimitry Andric return ConstantInt::get(DivTy, 1u, false);
95*5f7ddb14SDimitry Andric }
96*5f7ddb14SDimitry Andric
97*5f7ddb14SDimitry Andric /// Transfer operation properties from \p OldVPI to \p NewVal.
transferDecorations(Value & NewVal,VPIntrinsic & VPI)98*5f7ddb14SDimitry Andric static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) {
99*5f7ddb14SDimitry Andric auto *NewInst = dyn_cast<Instruction>(&NewVal);
100*5f7ddb14SDimitry Andric if (!NewInst || !isa<FPMathOperator>(NewVal))
101*5f7ddb14SDimitry Andric return;
102*5f7ddb14SDimitry Andric
103*5f7ddb14SDimitry Andric auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI);
104*5f7ddb14SDimitry Andric if (!OldFMOp)
105*5f7ddb14SDimitry Andric return;
106*5f7ddb14SDimitry Andric
107*5f7ddb14SDimitry Andric NewInst->setFastMathFlags(OldFMOp->getFastMathFlags());
108*5f7ddb14SDimitry Andric }
109*5f7ddb14SDimitry Andric
110*5f7ddb14SDimitry Andric /// Transfer all properties from \p OldOp to \p NewOp and replace all uses.
111*5f7ddb14SDimitry Andric /// OldVP gets erased.
replaceOperation(Value & NewOp,VPIntrinsic & OldOp)112*5f7ddb14SDimitry Andric static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) {
113*5f7ddb14SDimitry Andric transferDecorations(NewOp, OldOp);
114*5f7ddb14SDimitry Andric OldOp.replaceAllUsesWith(&NewOp);
115*5f7ddb14SDimitry Andric OldOp.eraseFromParent();
116*5f7ddb14SDimitry Andric }
117*5f7ddb14SDimitry Andric
118*5f7ddb14SDimitry Andric //// } Helpers
119*5f7ddb14SDimitry Andric
120*5f7ddb14SDimitry Andric namespace {
121*5f7ddb14SDimitry Andric
122*5f7ddb14SDimitry Andric // Expansion pass state at function scope.
123*5f7ddb14SDimitry Andric struct CachingVPExpander {
124*5f7ddb14SDimitry Andric Function &F;
125*5f7ddb14SDimitry Andric const TargetTransformInfo &TTI;
126*5f7ddb14SDimitry Andric
127*5f7ddb14SDimitry Andric /// \returns A (fixed length) vector with ascending integer indices
128*5f7ddb14SDimitry Andric /// (<0, 1, ..., NumElems-1>).
129*5f7ddb14SDimitry Andric /// \p Builder
130*5f7ddb14SDimitry Andric /// Used for instruction creation.
131*5f7ddb14SDimitry Andric /// \p LaneTy
132*5f7ddb14SDimitry Andric /// Integer element type of the result vector.
133*5f7ddb14SDimitry Andric /// \p NumElems
134*5f7ddb14SDimitry Andric /// Number of vector elements.
135*5f7ddb14SDimitry Andric Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy,
136*5f7ddb14SDimitry Andric unsigned NumElems);
137*5f7ddb14SDimitry Andric
138*5f7ddb14SDimitry Andric /// \returns A bitmask that is true where the lane position is less-than \p
139*5f7ddb14SDimitry Andric /// EVLParam
140*5f7ddb14SDimitry Andric ///
141*5f7ddb14SDimitry Andric /// \p Builder
142*5f7ddb14SDimitry Andric /// Used for instruction creation.
143*5f7ddb14SDimitry Andric /// \p VLParam
144*5f7ddb14SDimitry Andric /// The explicit vector length parameter to test against the lane
145*5f7ddb14SDimitry Andric /// positions.
146*5f7ddb14SDimitry Andric /// \p ElemCount
147*5f7ddb14SDimitry Andric /// Static (potentially scalable) number of vector elements.
148*5f7ddb14SDimitry Andric Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam,
149*5f7ddb14SDimitry Andric ElementCount ElemCount);
150*5f7ddb14SDimitry Andric
151*5f7ddb14SDimitry Andric Value *foldEVLIntoMask(VPIntrinsic &VPI);
152*5f7ddb14SDimitry Andric
153*5f7ddb14SDimitry Andric /// "Remove" the %evl parameter of \p PI by setting it to the static vector
154*5f7ddb14SDimitry Andric /// length of the operation.
155*5f7ddb14SDimitry Andric void discardEVLParameter(VPIntrinsic &PI);
156*5f7ddb14SDimitry Andric
157*5f7ddb14SDimitry Andric /// \brief Lower this VP binary operator to a unpredicated binary operator.
158*5f7ddb14SDimitry Andric Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder,
159*5f7ddb14SDimitry Andric VPIntrinsic &PI);
160*5f7ddb14SDimitry Andric
161*5f7ddb14SDimitry Andric /// \brief Query TTI and expand the vector predication in \p P accordingly.
162*5f7ddb14SDimitry Andric Value *expandPredication(VPIntrinsic &PI);
163*5f7ddb14SDimitry Andric
164*5f7ddb14SDimitry Andric /// \brief Determine how and whether the VPIntrinsic \p VPI shall be
165*5f7ddb14SDimitry Andric /// expanded. This overrides TTI with the cl::opts listed at the top of this
166*5f7ddb14SDimitry Andric /// file.
167*5f7ddb14SDimitry Andric VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const;
168*5f7ddb14SDimitry Andric bool UsingTTIOverrides;
169*5f7ddb14SDimitry Andric
170*5f7ddb14SDimitry Andric public:
CachingVPExpander__anonb4a9eecd0111::CachingVPExpander171*5f7ddb14SDimitry Andric CachingVPExpander(Function &F, const TargetTransformInfo &TTI)
172*5f7ddb14SDimitry Andric : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {}
173*5f7ddb14SDimitry Andric
174*5f7ddb14SDimitry Andric bool expandVectorPredication();
175*5f7ddb14SDimitry Andric };
176*5f7ddb14SDimitry Andric
177*5f7ddb14SDimitry Andric //// CachingVPExpander {
178*5f7ddb14SDimitry Andric
createStepVector(IRBuilder<> & Builder,Type * LaneTy,unsigned NumElems)179*5f7ddb14SDimitry Andric Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy,
180*5f7ddb14SDimitry Andric unsigned NumElems) {
181*5f7ddb14SDimitry Andric // TODO add caching
182*5f7ddb14SDimitry Andric SmallVector<Constant *, 16> ConstElems;
183*5f7ddb14SDimitry Andric
184*5f7ddb14SDimitry Andric for (unsigned Idx = 0; Idx < NumElems; ++Idx)
185*5f7ddb14SDimitry Andric ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false));
186*5f7ddb14SDimitry Andric
187*5f7ddb14SDimitry Andric return ConstantVector::get(ConstElems);
188*5f7ddb14SDimitry Andric }
189*5f7ddb14SDimitry Andric
convertEVLToMask(IRBuilder<> & Builder,Value * EVLParam,ElementCount ElemCount)190*5f7ddb14SDimitry Andric Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder,
191*5f7ddb14SDimitry Andric Value *EVLParam,
192*5f7ddb14SDimitry Andric ElementCount ElemCount) {
193*5f7ddb14SDimitry Andric // TODO add caching
194*5f7ddb14SDimitry Andric // Scalable vector %evl conversion.
195*5f7ddb14SDimitry Andric if (ElemCount.isScalable()) {
196*5f7ddb14SDimitry Andric auto *M = Builder.GetInsertBlock()->getModule();
197*5f7ddb14SDimitry Andric Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount);
198*5f7ddb14SDimitry Andric Function *ActiveMaskFunc = Intrinsic::getDeclaration(
199*5f7ddb14SDimitry Andric M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()});
200*5f7ddb14SDimitry Andric // `get_active_lane_mask` performs an implicit less-than comparison.
201*5f7ddb14SDimitry Andric Value *ConstZero = Builder.getInt32(0);
202*5f7ddb14SDimitry Andric return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam});
203*5f7ddb14SDimitry Andric }
204*5f7ddb14SDimitry Andric
205*5f7ddb14SDimitry Andric // Fixed vector %evl conversion.
206*5f7ddb14SDimitry Andric Type *LaneTy = EVLParam->getType();
207*5f7ddb14SDimitry Andric unsigned NumElems = ElemCount.getFixedValue();
208*5f7ddb14SDimitry Andric Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam);
209*5f7ddb14SDimitry Andric Value *IdxVec = createStepVector(Builder, LaneTy, NumElems);
210*5f7ddb14SDimitry Andric return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat);
211*5f7ddb14SDimitry Andric }
212*5f7ddb14SDimitry Andric
213*5f7ddb14SDimitry Andric Value *
expandPredicationInBinaryOperator(IRBuilder<> & Builder,VPIntrinsic & VPI)214*5f7ddb14SDimitry Andric CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder,
215*5f7ddb14SDimitry Andric VPIntrinsic &VPI) {
216*5f7ddb14SDimitry Andric assert((isSafeToSpeculativelyExecute(&VPI) ||
217*5f7ddb14SDimitry Andric VPI.canIgnoreVectorLengthParam()) &&
218*5f7ddb14SDimitry Andric "Implicitly dropping %evl in non-speculatable operator!");
219*5f7ddb14SDimitry Andric
220*5f7ddb14SDimitry Andric auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode());
221*5f7ddb14SDimitry Andric assert(Instruction::isBinaryOp(OC));
222*5f7ddb14SDimitry Andric
223*5f7ddb14SDimitry Andric Value *Op0 = VPI.getOperand(0);
224*5f7ddb14SDimitry Andric Value *Op1 = VPI.getOperand(1);
225*5f7ddb14SDimitry Andric Value *Mask = VPI.getMaskParam();
226*5f7ddb14SDimitry Andric
227*5f7ddb14SDimitry Andric // Blend in safe operands.
228*5f7ddb14SDimitry Andric if (Mask && !isAllTrueMask(Mask)) {
229*5f7ddb14SDimitry Andric switch (OC) {
230*5f7ddb14SDimitry Andric default:
231*5f7ddb14SDimitry Andric // Can safely ignore the predicate.
232*5f7ddb14SDimitry Andric break;
233*5f7ddb14SDimitry Andric
234*5f7ddb14SDimitry Andric // Division operators need a safe divisor on masked-off lanes (1).
235*5f7ddb14SDimitry Andric case Instruction::UDiv:
236*5f7ddb14SDimitry Andric case Instruction::SDiv:
237*5f7ddb14SDimitry Andric case Instruction::URem:
238*5f7ddb14SDimitry Andric case Instruction::SRem:
239*5f7ddb14SDimitry Andric // 2nd operand must not be zero.
240*5f7ddb14SDimitry Andric Value *SafeDivisor = getSafeDivisor(VPI.getType());
241*5f7ddb14SDimitry Andric Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor);
242*5f7ddb14SDimitry Andric }
243*5f7ddb14SDimitry Andric }
244*5f7ddb14SDimitry Andric
245*5f7ddb14SDimitry Andric Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName());
246*5f7ddb14SDimitry Andric
247*5f7ddb14SDimitry Andric replaceOperation(*NewBinOp, VPI);
248*5f7ddb14SDimitry Andric return NewBinOp;
249*5f7ddb14SDimitry Andric }
250*5f7ddb14SDimitry Andric
discardEVLParameter(VPIntrinsic & VPI)251*5f7ddb14SDimitry Andric void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) {
252*5f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n");
253*5f7ddb14SDimitry Andric
254*5f7ddb14SDimitry Andric if (VPI.canIgnoreVectorLengthParam())
255*5f7ddb14SDimitry Andric return;
256*5f7ddb14SDimitry Andric
257*5f7ddb14SDimitry Andric Value *EVLParam = VPI.getVectorLengthParam();
258*5f7ddb14SDimitry Andric if (!EVLParam)
259*5f7ddb14SDimitry Andric return;
260*5f7ddb14SDimitry Andric
261*5f7ddb14SDimitry Andric ElementCount StaticElemCount = VPI.getStaticVectorLength();
262*5f7ddb14SDimitry Andric Value *MaxEVL = nullptr;
263*5f7ddb14SDimitry Andric Type *Int32Ty = Type::getInt32Ty(VPI.getContext());
264*5f7ddb14SDimitry Andric if (StaticElemCount.isScalable()) {
265*5f7ddb14SDimitry Andric // TODO add caching
266*5f7ddb14SDimitry Andric auto *M = VPI.getModule();
267*5f7ddb14SDimitry Andric Function *VScaleFunc =
268*5f7ddb14SDimitry Andric Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty);
269*5f7ddb14SDimitry Andric IRBuilder<> Builder(VPI.getParent(), VPI.getIterator());
270*5f7ddb14SDimitry Andric Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue());
271*5f7ddb14SDimitry Andric Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale");
272*5f7ddb14SDimitry Andric MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size",
273*5f7ddb14SDimitry Andric /*NUW*/ true, /*NSW*/ false);
274*5f7ddb14SDimitry Andric } else {
275*5f7ddb14SDimitry Andric MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false);
276*5f7ddb14SDimitry Andric }
277*5f7ddb14SDimitry Andric VPI.setVectorLengthParam(MaxEVL);
278*5f7ddb14SDimitry Andric }
279*5f7ddb14SDimitry Andric
foldEVLIntoMask(VPIntrinsic & VPI)280*5f7ddb14SDimitry Andric Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) {
281*5f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n');
282*5f7ddb14SDimitry Andric
283*5f7ddb14SDimitry Andric IRBuilder<> Builder(&VPI);
284*5f7ddb14SDimitry Andric
285*5f7ddb14SDimitry Andric // Ineffective %evl parameter and so nothing to do here.
286*5f7ddb14SDimitry Andric if (VPI.canIgnoreVectorLengthParam())
287*5f7ddb14SDimitry Andric return &VPI;
288*5f7ddb14SDimitry Andric
289*5f7ddb14SDimitry Andric // Only VP intrinsics can have an %evl parameter.
290*5f7ddb14SDimitry Andric Value *OldMaskParam = VPI.getMaskParam();
291*5f7ddb14SDimitry Andric Value *OldEVLParam = VPI.getVectorLengthParam();
292*5f7ddb14SDimitry Andric assert(OldMaskParam && "no mask param to fold the vl param into");
293*5f7ddb14SDimitry Andric assert(OldEVLParam && "no EVL param to fold away");
294*5f7ddb14SDimitry Andric
295*5f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n');
296*5f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n');
297*5f7ddb14SDimitry Andric
298*5f7ddb14SDimitry Andric // Convert the %evl predication into vector mask predication.
299*5f7ddb14SDimitry Andric ElementCount ElemCount = VPI.getStaticVectorLength();
300*5f7ddb14SDimitry Andric Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount);
301*5f7ddb14SDimitry Andric Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam);
302*5f7ddb14SDimitry Andric VPI.setMaskParam(NewMaskParam);
303*5f7ddb14SDimitry Andric
304*5f7ddb14SDimitry Andric // Drop the %evl parameter.
305*5f7ddb14SDimitry Andric discardEVLParameter(VPI);
306*5f7ddb14SDimitry Andric assert(VPI.canIgnoreVectorLengthParam() &&
307*5f7ddb14SDimitry Andric "transformation did not render the evl param ineffective!");
308*5f7ddb14SDimitry Andric
309*5f7ddb14SDimitry Andric // Reassess the modified instruction.
310*5f7ddb14SDimitry Andric return &VPI;
311*5f7ddb14SDimitry Andric }
312*5f7ddb14SDimitry Andric
expandPredication(VPIntrinsic & VPI)313*5f7ddb14SDimitry Andric Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) {
314*5f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n');
315*5f7ddb14SDimitry Andric
316*5f7ddb14SDimitry Andric IRBuilder<> Builder(&VPI);
317*5f7ddb14SDimitry Andric
318*5f7ddb14SDimitry Andric // Try lowering to a LLVM instruction first.
319*5f7ddb14SDimitry Andric auto OC = VPI.getFunctionalOpcode();
320*5f7ddb14SDimitry Andric
321*5f7ddb14SDimitry Andric if (OC && Instruction::isBinaryOp(*OC))
322*5f7ddb14SDimitry Andric return expandPredicationInBinaryOperator(Builder, VPI);
323*5f7ddb14SDimitry Andric
324*5f7ddb14SDimitry Andric return &VPI;
325*5f7ddb14SDimitry Andric }
326*5f7ddb14SDimitry Andric
327*5f7ddb14SDimitry Andric //// } CachingVPExpander
328*5f7ddb14SDimitry Andric
329*5f7ddb14SDimitry Andric struct TransformJob {
330*5f7ddb14SDimitry Andric VPIntrinsic *PI;
331*5f7ddb14SDimitry Andric TargetTransformInfo::VPLegalization Strategy;
TransformJob__anonb4a9eecd0111::TransformJob332*5f7ddb14SDimitry Andric TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat)
333*5f7ddb14SDimitry Andric : PI(PI), Strategy(InitStrat) {}
334*5f7ddb14SDimitry Andric
isDone__anonb4a9eecd0111::TransformJob335*5f7ddb14SDimitry Andric bool isDone() const { return Strategy.shouldDoNothing(); }
336*5f7ddb14SDimitry Andric };
337*5f7ddb14SDimitry Andric
sanitizeStrategy(Instruction & I,VPLegalization & LegalizeStrat)338*5f7ddb14SDimitry Andric void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) {
339*5f7ddb14SDimitry Andric // Speculatable instructions do not strictly need predication.
340*5f7ddb14SDimitry Andric if (isSafeToSpeculativelyExecute(&I)) {
341*5f7ddb14SDimitry Andric // Converting a speculatable VP intrinsic means dropping %mask and %evl.
342*5f7ddb14SDimitry Andric // No need to expand %evl into the %mask only to ignore that code.
343*5f7ddb14SDimitry Andric if (LegalizeStrat.OpStrategy == VPLegalization::Convert)
344*5f7ddb14SDimitry Andric LegalizeStrat.EVLParamStrategy = VPLegalization::Discard;
345*5f7ddb14SDimitry Andric return;
346*5f7ddb14SDimitry Andric }
347*5f7ddb14SDimitry Andric
348*5f7ddb14SDimitry Andric // We have to preserve the predicating effect of %evl for this
349*5f7ddb14SDimitry Andric // non-speculatable VP intrinsic.
350*5f7ddb14SDimitry Andric // 1) Never discard %evl.
351*5f7ddb14SDimitry Andric // 2) If this VP intrinsic will be expanded to non-VP code, make sure that
352*5f7ddb14SDimitry Andric // %evl gets folded into %mask.
353*5f7ddb14SDimitry Andric if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) ||
354*5f7ddb14SDimitry Andric (LegalizeStrat.OpStrategy == VPLegalization::Convert)) {
355*5f7ddb14SDimitry Andric LegalizeStrat.EVLParamStrategy = VPLegalization::Convert;
356*5f7ddb14SDimitry Andric }
357*5f7ddb14SDimitry Andric }
358*5f7ddb14SDimitry Andric
359*5f7ddb14SDimitry Andric VPLegalization
getVPLegalizationStrategy(const VPIntrinsic & VPI) const360*5f7ddb14SDimitry Andric CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const {
361*5f7ddb14SDimitry Andric auto VPStrat = TTI.getVPLegalizationStrategy(VPI);
362*5f7ddb14SDimitry Andric if (LLVM_LIKELY(!UsingTTIOverrides)) {
363*5f7ddb14SDimitry Andric // No overrides - we are in production.
364*5f7ddb14SDimitry Andric return VPStrat;
365*5f7ddb14SDimitry Andric }
366*5f7ddb14SDimitry Andric
367*5f7ddb14SDimitry Andric // Overrides set - we are in testing, the following does not need to be
368*5f7ddb14SDimitry Andric // efficient.
369*5f7ddb14SDimitry Andric VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride);
370*5f7ddb14SDimitry Andric VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride);
371*5f7ddb14SDimitry Andric return VPStrat;
372*5f7ddb14SDimitry Andric }
373*5f7ddb14SDimitry Andric
374*5f7ddb14SDimitry Andric /// \brief Expand llvm.vp.* intrinsics as requested by \p TTI.
expandVectorPredication()375*5f7ddb14SDimitry Andric bool CachingVPExpander::expandVectorPredication() {
376*5f7ddb14SDimitry Andric SmallVector<TransformJob, 16> Worklist;
377*5f7ddb14SDimitry Andric
378*5f7ddb14SDimitry Andric // Collect all VPIntrinsics that need expansion and determine their expansion
379*5f7ddb14SDimitry Andric // strategy.
380*5f7ddb14SDimitry Andric for (auto &I : instructions(F)) {
381*5f7ddb14SDimitry Andric auto *VPI = dyn_cast<VPIntrinsic>(&I);
382*5f7ddb14SDimitry Andric if (!VPI)
383*5f7ddb14SDimitry Andric continue;
384*5f7ddb14SDimitry Andric auto VPStrat = getVPLegalizationStrategy(*VPI);
385*5f7ddb14SDimitry Andric sanitizeStrategy(I, VPStrat);
386*5f7ddb14SDimitry Andric if (!VPStrat.shouldDoNothing())
387*5f7ddb14SDimitry Andric Worklist.emplace_back(VPI, VPStrat);
388*5f7ddb14SDimitry Andric }
389*5f7ddb14SDimitry Andric if (Worklist.empty())
390*5f7ddb14SDimitry Andric return false;
391*5f7ddb14SDimitry Andric
392*5f7ddb14SDimitry Andric // Transform all VPIntrinsics on the worklist.
393*5f7ddb14SDimitry Andric LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size()
394*5f7ddb14SDimitry Andric << " instructions ::::\n");
395*5f7ddb14SDimitry Andric for (TransformJob Job : Worklist) {
396*5f7ddb14SDimitry Andric // Transform the EVL parameter.
397*5f7ddb14SDimitry Andric switch (Job.Strategy.EVLParamStrategy) {
398*5f7ddb14SDimitry Andric case VPLegalization::Legal:
399*5f7ddb14SDimitry Andric break;
400*5f7ddb14SDimitry Andric case VPLegalization::Discard:
401*5f7ddb14SDimitry Andric discardEVLParameter(*Job.PI);
402*5f7ddb14SDimitry Andric break;
403*5f7ddb14SDimitry Andric case VPLegalization::Convert:
404*5f7ddb14SDimitry Andric if (foldEVLIntoMask(*Job.PI))
405*5f7ddb14SDimitry Andric ++NumFoldedVL;
406*5f7ddb14SDimitry Andric break;
407*5f7ddb14SDimitry Andric }
408*5f7ddb14SDimitry Andric Job.Strategy.EVLParamStrategy = VPLegalization::Legal;
409*5f7ddb14SDimitry Andric
410*5f7ddb14SDimitry Andric // Replace with a non-predicated operation.
411*5f7ddb14SDimitry Andric switch (Job.Strategy.OpStrategy) {
412*5f7ddb14SDimitry Andric case VPLegalization::Legal:
413*5f7ddb14SDimitry Andric break;
414*5f7ddb14SDimitry Andric case VPLegalization::Discard:
415*5f7ddb14SDimitry Andric llvm_unreachable("Invalid strategy for operators.");
416*5f7ddb14SDimitry Andric case VPLegalization::Convert:
417*5f7ddb14SDimitry Andric expandPredication(*Job.PI);
418*5f7ddb14SDimitry Andric ++NumLoweredVPOps;
419*5f7ddb14SDimitry Andric break;
420*5f7ddb14SDimitry Andric }
421*5f7ddb14SDimitry Andric Job.Strategy.OpStrategy = VPLegalization::Legal;
422*5f7ddb14SDimitry Andric
423*5f7ddb14SDimitry Andric assert(Job.isDone() && "incomplete transformation");
424*5f7ddb14SDimitry Andric }
425*5f7ddb14SDimitry Andric
426*5f7ddb14SDimitry Andric return true;
427*5f7ddb14SDimitry Andric }
428*5f7ddb14SDimitry Andric class ExpandVectorPredication : public FunctionPass {
429*5f7ddb14SDimitry Andric public:
430*5f7ddb14SDimitry Andric static char ID;
ExpandVectorPredication()431*5f7ddb14SDimitry Andric ExpandVectorPredication() : FunctionPass(ID) {
432*5f7ddb14SDimitry Andric initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry());
433*5f7ddb14SDimitry Andric }
434*5f7ddb14SDimitry Andric
runOnFunction(Function & F)435*5f7ddb14SDimitry Andric bool runOnFunction(Function &F) override {
436*5f7ddb14SDimitry Andric const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
437*5f7ddb14SDimitry Andric CachingVPExpander VPExpander(F, *TTI);
438*5f7ddb14SDimitry Andric return VPExpander.expandVectorPredication();
439*5f7ddb14SDimitry Andric }
440*5f7ddb14SDimitry Andric
getAnalysisUsage(AnalysisUsage & AU) const441*5f7ddb14SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override {
442*5f7ddb14SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>();
443*5f7ddb14SDimitry Andric AU.setPreservesCFG();
444*5f7ddb14SDimitry Andric }
445*5f7ddb14SDimitry Andric };
446*5f7ddb14SDimitry Andric } // namespace
447*5f7ddb14SDimitry Andric
448*5f7ddb14SDimitry Andric char ExpandVectorPredication::ID;
449*5f7ddb14SDimitry Andric INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp",
450*5f7ddb14SDimitry Andric "Expand vector predication intrinsics", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)451*5f7ddb14SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
452*5f7ddb14SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
453*5f7ddb14SDimitry Andric INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp",
454*5f7ddb14SDimitry Andric "Expand vector predication intrinsics", false, false)
455*5f7ddb14SDimitry Andric
456*5f7ddb14SDimitry Andric FunctionPass *llvm::createExpandVectorPredicationPass() {
457*5f7ddb14SDimitry Andric return new ExpandVectorPredication();
458*5f7ddb14SDimitry Andric }
459*5f7ddb14SDimitry Andric
460*5f7ddb14SDimitry Andric PreservedAnalyses
run(Function & F,FunctionAnalysisManager & AM)461*5f7ddb14SDimitry Andric ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) {
462*5f7ddb14SDimitry Andric const auto &TTI = AM.getResult<TargetIRAnalysis>(F);
463*5f7ddb14SDimitry Andric CachingVPExpander VPExpander(F, TTI);
464*5f7ddb14SDimitry Andric if (!VPExpander.expandVectorPredication())
465*5f7ddb14SDimitry Andric return PreservedAnalyses::all();
466*5f7ddb14SDimitry Andric PreservedAnalyses PA;
467*5f7ddb14SDimitry Andric PA.preserveSet<CFGAnalyses>();
468*5f7ddb14SDimitry Andric return PA;
469*5f7ddb14SDimitry Andric }
470