11db4dbbaSSimon Moll //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// 21db4dbbaSSimon Moll // 31db4dbbaSSimon Moll // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 41db4dbbaSSimon Moll // See https://llvm.org/LICENSE.txt for license information. 51db4dbbaSSimon Moll // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61db4dbbaSSimon Moll // 71db4dbbaSSimon Moll //===----------------------------------------------------------------------===// 81db4dbbaSSimon Moll // 91db4dbbaSSimon Moll // This pass implements IR expansion for vector predication intrinsics, allowing 101db4dbbaSSimon Moll // targets to enable vector predication until just before codegen. 111db4dbbaSSimon Moll // 121db4dbbaSSimon Moll //===----------------------------------------------------------------------===// 131db4dbbaSSimon Moll 141db4dbbaSSimon Moll #include "llvm/CodeGen/ExpandVectorPredication.h" 151db4dbbaSSimon Moll #include "llvm/ADT/Statistic.h" 161db4dbbaSSimon Moll #include "llvm/Analysis/TargetTransformInfo.h" 171db4dbbaSSimon Moll #include "llvm/Analysis/ValueTracking.h" 181db4dbbaSSimon Moll #include "llvm/CodeGen/Passes.h" 191db4dbbaSSimon Moll #include "llvm/IR/Constants.h" 201db4dbbaSSimon Moll #include "llvm/IR/Function.h" 211db4dbbaSSimon Moll #include "llvm/IR/IRBuilder.h" 221db4dbbaSSimon Moll #include "llvm/IR/InstIterator.h" 231db4dbbaSSimon Moll #include "llvm/IR/Instructions.h" 241db4dbbaSSimon Moll #include "llvm/IR/IntrinsicInst.h" 251db4dbbaSSimon Moll #include "llvm/IR/Intrinsics.h" 261db4dbbaSSimon Moll #include "llvm/IR/Module.h" 271db4dbbaSSimon Moll #include "llvm/InitializePasses.h" 281db4dbbaSSimon Moll #include "llvm/Pass.h" 291db4dbbaSSimon Moll #include "llvm/Support/CommandLine.h" 301db4dbbaSSimon Moll #include "llvm/Support/Compiler.h" 311db4dbbaSSimon Moll #include "llvm/Support/Debug.h" 321db4dbbaSSimon Moll #include "llvm/Support/MathExtras.h" 331db4dbbaSSimon Moll 341db4dbbaSSimon Moll using namespace llvm; 351db4dbbaSSimon Moll 361db4dbbaSSimon Moll using VPLegalization = TargetTransformInfo::VPLegalization; 371db4dbbaSSimon Moll using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; 381db4dbbaSSimon Moll 391db4dbbaSSimon Moll // Keep this in sync with TargetTransformInfo::VPLegalization. 401db4dbbaSSimon Moll #define VPINTERNAL_VPLEGAL_CASES \ 411db4dbbaSSimon Moll VPINTERNAL_CASE(Legal) \ 421db4dbbaSSimon Moll VPINTERNAL_CASE(Discard) \ 431db4dbbaSSimon Moll VPINTERNAL_CASE(Convert) 441db4dbbaSSimon Moll 451db4dbbaSSimon Moll #define VPINTERNAL_CASE(X) "|" #X 461db4dbbaSSimon Moll 471db4dbbaSSimon Moll // Override options. 481db4dbbaSSimon Moll static cl::opt<std::string> EVLTransformOverride( 491db4dbbaSSimon Moll "expandvp-override-evl-transform", cl::init(""), cl::Hidden, 501db4dbbaSSimon Moll cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 511db4dbbaSSimon Moll ". If non-empty, ignore " 521db4dbbaSSimon Moll "TargetTransformInfo and " 531db4dbbaSSimon Moll "always use this transformation for the %evl parameter (Used in " 541db4dbbaSSimon Moll "testing).")); 551db4dbbaSSimon Moll 561db4dbbaSSimon Moll static cl::opt<std::string> MaskTransformOverride( 571db4dbbaSSimon Moll "expandvp-override-mask-transform", cl::init(""), cl::Hidden, 581db4dbbaSSimon Moll cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 591db4dbbaSSimon Moll ". If non-empty, Ignore " 601db4dbbaSSimon Moll "TargetTransformInfo and " 611db4dbbaSSimon Moll "always use this transformation for the %mask parameter (Used in " 621db4dbbaSSimon Moll "testing).")); 631db4dbbaSSimon Moll 641db4dbbaSSimon Moll #undef VPINTERNAL_CASE 651db4dbbaSSimon Moll #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) 661db4dbbaSSimon Moll 671db4dbbaSSimon Moll static VPTransform parseOverrideOption(const std::string &TextOpt) { 681db4dbbaSSimon Moll return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; 691db4dbbaSSimon Moll } 701db4dbbaSSimon Moll 711db4dbbaSSimon Moll #undef VPINTERNAL_VPLEGAL_CASES 721db4dbbaSSimon Moll 731db4dbbaSSimon Moll // Whether any override options are set. 741db4dbbaSSimon Moll static bool anyExpandVPOverridesSet() { 751db4dbbaSSimon Moll return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); 761db4dbbaSSimon Moll } 771db4dbbaSSimon Moll 781db4dbbaSSimon Moll #define DEBUG_TYPE "expandvp" 791db4dbbaSSimon Moll 801db4dbbaSSimon Moll STATISTIC(NumFoldedVL, "Number of folded vector length params"); 811db4dbbaSSimon Moll STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); 821db4dbbaSSimon Moll 831db4dbbaSSimon Moll ///// Helpers { 841db4dbbaSSimon Moll 851db4dbbaSSimon Moll /// \returns Whether the vector mask \p MaskVal has all lane bits set. 861db4dbbaSSimon Moll static bool isAllTrueMask(Value *MaskVal) { 871db4dbbaSSimon Moll auto *ConstVec = dyn_cast<ConstantVector>(MaskVal); 881db4dbbaSSimon Moll return ConstVec && ConstVec->isAllOnesValue(); 891db4dbbaSSimon Moll } 901db4dbbaSSimon Moll 911db4dbbaSSimon Moll /// \returns A non-excepting divisor constant for this type. 921db4dbbaSSimon Moll static Constant *getSafeDivisor(Type *DivTy) { 931db4dbbaSSimon Moll assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type"); 941db4dbbaSSimon Moll return ConstantInt::get(DivTy, 1u, false); 951db4dbbaSSimon Moll } 961db4dbbaSSimon Moll 971db4dbbaSSimon Moll /// Transfer operation properties from \p OldVPI to \p NewVal. 981db4dbbaSSimon Moll static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { 991db4dbbaSSimon Moll auto *NewInst = dyn_cast<Instruction>(&NewVal); 1001db4dbbaSSimon Moll if (!NewInst || !isa<FPMathOperator>(NewVal)) 1011db4dbbaSSimon Moll return; 1021db4dbbaSSimon Moll 1031db4dbbaSSimon Moll auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI); 1041db4dbbaSSimon Moll if (!OldFMOp) 1051db4dbbaSSimon Moll return; 1061db4dbbaSSimon Moll 1071db4dbbaSSimon Moll NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); 1081db4dbbaSSimon Moll } 1091db4dbbaSSimon Moll 1101db4dbbaSSimon Moll /// Transfer all properties from \p OldOp to \p NewOp and replace all uses. 1111db4dbbaSSimon Moll /// OldVP gets erased. 1121db4dbbaSSimon Moll static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { 1131db4dbbaSSimon Moll transferDecorations(NewOp, OldOp); 1141db4dbbaSSimon Moll OldOp.replaceAllUsesWith(&NewOp); 1151db4dbbaSSimon Moll OldOp.eraseFromParent(); 1161db4dbbaSSimon Moll } 1171db4dbbaSSimon Moll 1181db4dbbaSSimon Moll //// } Helpers 1191db4dbbaSSimon Moll 1201db4dbbaSSimon Moll namespace { 1211db4dbbaSSimon Moll 1221db4dbbaSSimon Moll // Expansion pass state at function scope. 1231db4dbbaSSimon Moll struct CachingVPExpander { 1241db4dbbaSSimon Moll Function &F; 1251db4dbbaSSimon Moll const TargetTransformInfo &TTI; 1261db4dbbaSSimon Moll 1271db4dbbaSSimon Moll /// \returns A (fixed length) vector with ascending integer indices 1281db4dbbaSSimon Moll /// (<0, 1, ..., NumElems-1>). 1291db4dbbaSSimon Moll /// \p Builder 1301db4dbbaSSimon Moll /// Used for instruction creation. 1311db4dbbaSSimon Moll /// \p LaneTy 1321db4dbbaSSimon Moll /// Integer element type of the result vector. 1331db4dbbaSSimon Moll /// \p NumElems 1341db4dbbaSSimon Moll /// Number of vector elements. 1351db4dbbaSSimon Moll Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy, 1361db4dbbaSSimon Moll unsigned NumElems); 1371db4dbbaSSimon Moll 1381db4dbbaSSimon Moll /// \returns A bitmask that is true where the lane position is less-than \p 1391db4dbbaSSimon Moll /// EVLParam 1401db4dbbaSSimon Moll /// 1411db4dbbaSSimon Moll /// \p Builder 1421db4dbbaSSimon Moll /// Used for instruction creation. 1431db4dbbaSSimon Moll /// \p VLParam 1441db4dbbaSSimon Moll /// The explicit vector length parameter to test against the lane 1451db4dbbaSSimon Moll /// positions. 1461db4dbbaSSimon Moll /// \p ElemCount 1471db4dbbaSSimon Moll /// Static (potentially scalable) number of vector elements. 1481db4dbbaSSimon Moll Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, 1491db4dbbaSSimon Moll ElementCount ElemCount); 1501db4dbbaSSimon Moll 1511db4dbbaSSimon Moll Value *foldEVLIntoMask(VPIntrinsic &VPI); 1521db4dbbaSSimon Moll 1531db4dbbaSSimon Moll /// "Remove" the %evl parameter of \p PI by setting it to the static vector 1541db4dbbaSSimon Moll /// length of the operation. 1551db4dbbaSSimon Moll void discardEVLParameter(VPIntrinsic &PI); 1561db4dbbaSSimon Moll 1571db4dbbaSSimon Moll /// \brief Lower this VP binary operator to a unpredicated binary operator. 1581db4dbbaSSimon Moll Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, 1591db4dbbaSSimon Moll VPIntrinsic &PI); 1601db4dbbaSSimon Moll 161*f3e90472SFraser Cormack /// \brief Lower this VP reduction to a call to an unpredicated reduction 162*f3e90472SFraser Cormack /// intrinsic. 163*f3e90472SFraser Cormack Value *expandPredicationInReduction(IRBuilder<> &Builder, 164*f3e90472SFraser Cormack VPReductionIntrinsic &PI); 165*f3e90472SFraser Cormack 1661db4dbbaSSimon Moll /// \brief Query TTI and expand the vector predication in \p P accordingly. 1671db4dbbaSSimon Moll Value *expandPredication(VPIntrinsic &PI); 1681db4dbbaSSimon Moll 1691db4dbbaSSimon Moll /// \brief Determine how and whether the VPIntrinsic \p VPI shall be 1701db4dbbaSSimon Moll /// expanded. This overrides TTI with the cl::opts listed at the top of this 1711db4dbbaSSimon Moll /// file. 1721db4dbbaSSimon Moll VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; 1731db4dbbaSSimon Moll bool UsingTTIOverrides; 1741db4dbbaSSimon Moll 1751db4dbbaSSimon Moll public: 1761db4dbbaSSimon Moll CachingVPExpander(Function &F, const TargetTransformInfo &TTI) 1771db4dbbaSSimon Moll : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} 1781db4dbbaSSimon Moll 1791db4dbbaSSimon Moll bool expandVectorPredication(); 1801db4dbbaSSimon Moll }; 1811db4dbbaSSimon Moll 1821db4dbbaSSimon Moll //// CachingVPExpander { 1831db4dbbaSSimon Moll 1841db4dbbaSSimon Moll Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy, 1851db4dbbaSSimon Moll unsigned NumElems) { 1861db4dbbaSSimon Moll // TODO add caching 1871db4dbbaSSimon Moll SmallVector<Constant *, 16> ConstElems; 1881db4dbbaSSimon Moll 1891db4dbbaSSimon Moll for (unsigned Idx = 0; Idx < NumElems; ++Idx) 1901db4dbbaSSimon Moll ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false)); 1911db4dbbaSSimon Moll 1921db4dbbaSSimon Moll return ConstantVector::get(ConstElems); 1931db4dbbaSSimon Moll } 1941db4dbbaSSimon Moll 1951db4dbbaSSimon Moll Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, 1961db4dbbaSSimon Moll Value *EVLParam, 1971db4dbbaSSimon Moll ElementCount ElemCount) { 1981db4dbbaSSimon Moll // TODO add caching 1991db4dbbaSSimon Moll // Scalable vector %evl conversion. 2001db4dbbaSSimon Moll if (ElemCount.isScalable()) { 2011db4dbbaSSimon Moll auto *M = Builder.GetInsertBlock()->getModule(); 2021db4dbbaSSimon Moll Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount); 2031db4dbbaSSimon Moll Function *ActiveMaskFunc = Intrinsic::getDeclaration( 2041db4dbbaSSimon Moll M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()}); 2051db4dbbaSSimon Moll // `get_active_lane_mask` performs an implicit less-than comparison. 2061db4dbbaSSimon Moll Value *ConstZero = Builder.getInt32(0); 2071db4dbbaSSimon Moll return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam}); 2081db4dbbaSSimon Moll } 2091db4dbbaSSimon Moll 2101db4dbbaSSimon Moll // Fixed vector %evl conversion. 2111db4dbbaSSimon Moll Type *LaneTy = EVLParam->getType(); 2121db4dbbaSSimon Moll unsigned NumElems = ElemCount.getFixedValue(); 2131db4dbbaSSimon Moll Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam); 2141db4dbbaSSimon Moll Value *IdxVec = createStepVector(Builder, LaneTy, NumElems); 2151db4dbbaSSimon Moll return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat); 2161db4dbbaSSimon Moll } 2171db4dbbaSSimon Moll 2181db4dbbaSSimon Moll Value * 2191db4dbbaSSimon Moll CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, 2201db4dbbaSSimon Moll VPIntrinsic &VPI) { 2211db4dbbaSSimon Moll assert((isSafeToSpeculativelyExecute(&VPI) || 2221db4dbbaSSimon Moll VPI.canIgnoreVectorLengthParam()) && 2231db4dbbaSSimon Moll "Implicitly dropping %evl in non-speculatable operator!"); 2241db4dbbaSSimon Moll 22566963bf3SSimon Moll auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); 2261db4dbbaSSimon Moll assert(Instruction::isBinaryOp(OC)); 2271db4dbbaSSimon Moll 2281db4dbbaSSimon Moll Value *Op0 = VPI.getOperand(0); 2291db4dbbaSSimon Moll Value *Op1 = VPI.getOperand(1); 2301db4dbbaSSimon Moll Value *Mask = VPI.getMaskParam(); 2311db4dbbaSSimon Moll 2321db4dbbaSSimon Moll // Blend in safe operands. 2331db4dbbaSSimon Moll if (Mask && !isAllTrueMask(Mask)) { 2341db4dbbaSSimon Moll switch (OC) { 2351db4dbbaSSimon Moll default: 2361db4dbbaSSimon Moll // Can safely ignore the predicate. 2371db4dbbaSSimon Moll break; 2381db4dbbaSSimon Moll 2391db4dbbaSSimon Moll // Division operators need a safe divisor on masked-off lanes (1). 2401db4dbbaSSimon Moll case Instruction::UDiv: 2411db4dbbaSSimon Moll case Instruction::SDiv: 2421db4dbbaSSimon Moll case Instruction::URem: 2431db4dbbaSSimon Moll case Instruction::SRem: 2441db4dbbaSSimon Moll // 2nd operand must not be zero. 2451db4dbbaSSimon Moll Value *SafeDivisor = getSafeDivisor(VPI.getType()); 2461db4dbbaSSimon Moll Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor); 2471db4dbbaSSimon Moll } 2481db4dbbaSSimon Moll } 2491db4dbbaSSimon Moll 2501db4dbbaSSimon Moll Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName()); 2511db4dbbaSSimon Moll 2521db4dbbaSSimon Moll replaceOperation(*NewBinOp, VPI); 2531db4dbbaSSimon Moll return NewBinOp; 2541db4dbbaSSimon Moll } 2551db4dbbaSSimon Moll 256*f3e90472SFraser Cormack static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, 257*f3e90472SFraser Cormack Type *EltTy) { 258*f3e90472SFraser Cormack bool Negative = false; 259*f3e90472SFraser Cormack unsigned EltBits = EltTy->getScalarSizeInBits(); 260*f3e90472SFraser Cormack switch (VPI.getIntrinsicID()) { 261*f3e90472SFraser Cormack default: 262*f3e90472SFraser Cormack llvm_unreachable("Expecting a VP reduction intrinsic"); 263*f3e90472SFraser Cormack case Intrinsic::vp_reduce_add: 264*f3e90472SFraser Cormack case Intrinsic::vp_reduce_or: 265*f3e90472SFraser Cormack case Intrinsic::vp_reduce_xor: 266*f3e90472SFraser Cormack case Intrinsic::vp_reduce_umax: 267*f3e90472SFraser Cormack return Constant::getNullValue(EltTy); 268*f3e90472SFraser Cormack case Intrinsic::vp_reduce_mul: 269*f3e90472SFraser Cormack return ConstantInt::get(EltTy, 1, /*IsSigned*/ false); 270*f3e90472SFraser Cormack case Intrinsic::vp_reduce_and: 271*f3e90472SFraser Cormack case Intrinsic::vp_reduce_umin: 272*f3e90472SFraser Cormack return ConstantInt::getAllOnesValue(EltTy); 273*f3e90472SFraser Cormack case Intrinsic::vp_reduce_smin: 274*f3e90472SFraser Cormack return ConstantInt::get(EltTy->getContext(), 275*f3e90472SFraser Cormack APInt::getSignedMaxValue(EltBits)); 276*f3e90472SFraser Cormack case Intrinsic::vp_reduce_smax: 277*f3e90472SFraser Cormack return ConstantInt::get(EltTy->getContext(), 278*f3e90472SFraser Cormack APInt::getSignedMinValue(EltBits)); 279*f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmax: 280*f3e90472SFraser Cormack Negative = true; 281*f3e90472SFraser Cormack LLVM_FALLTHROUGH; 282*f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmin: { 283*f3e90472SFraser Cormack FastMathFlags Flags = VPI.getFastMathFlags(); 284*f3e90472SFraser Cormack const fltSemantics &Semantics = EltTy->getFltSemantics(); 285*f3e90472SFraser Cormack return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative) 286*f3e90472SFraser Cormack : !Flags.noInfs() 287*f3e90472SFraser Cormack ? ConstantFP::getInfinity(EltTy, Negative) 288*f3e90472SFraser Cormack : ConstantFP::get(EltTy, 289*f3e90472SFraser Cormack APFloat::getLargest(Semantics, Negative)); 290*f3e90472SFraser Cormack } 291*f3e90472SFraser Cormack case Intrinsic::vp_reduce_fadd: 292*f3e90472SFraser Cormack return ConstantFP::getNegativeZero(EltTy); 293*f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmul: 294*f3e90472SFraser Cormack return ConstantFP::get(EltTy, 1.0); 295*f3e90472SFraser Cormack } 296*f3e90472SFraser Cormack } 297*f3e90472SFraser Cormack 298*f3e90472SFraser Cormack Value * 299*f3e90472SFraser Cormack CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, 300*f3e90472SFraser Cormack VPReductionIntrinsic &VPI) { 301*f3e90472SFraser Cormack assert((isSafeToSpeculativelyExecute(&VPI) || 302*f3e90472SFraser Cormack VPI.canIgnoreVectorLengthParam()) && 303*f3e90472SFraser Cormack "Implicitly dropping %evl in non-speculatable operator!"); 304*f3e90472SFraser Cormack 305*f3e90472SFraser Cormack Value *Mask = VPI.getMaskParam(); 306*f3e90472SFraser Cormack Value *RedOp = VPI.getOperand(VPI.getVectorParamPos()); 307*f3e90472SFraser Cormack 308*f3e90472SFraser Cormack // Insert neutral element in masked-out positions 309*f3e90472SFraser Cormack if (Mask && !isAllTrueMask(Mask)) { 310*f3e90472SFraser Cormack auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType()); 311*f3e90472SFraser Cormack auto *NeutralVector = Builder.CreateVectorSplat( 312*f3e90472SFraser Cormack cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt); 313*f3e90472SFraser Cormack RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector); 314*f3e90472SFraser Cormack } 315*f3e90472SFraser Cormack 316*f3e90472SFraser Cormack Value *Reduction; 317*f3e90472SFraser Cormack Value *Start = VPI.getOperand(VPI.getStartParamPos()); 318*f3e90472SFraser Cormack 319*f3e90472SFraser Cormack switch (VPI.getIntrinsicID()) { 320*f3e90472SFraser Cormack default: 321*f3e90472SFraser Cormack llvm_unreachable("Impossible reduction kind"); 322*f3e90472SFraser Cormack case Intrinsic::vp_reduce_add: 323*f3e90472SFraser Cormack Reduction = Builder.CreateAddReduce(RedOp); 324*f3e90472SFraser Cormack Reduction = Builder.CreateAdd(Reduction, Start); 325*f3e90472SFraser Cormack break; 326*f3e90472SFraser Cormack case Intrinsic::vp_reduce_mul: 327*f3e90472SFraser Cormack Reduction = Builder.CreateMulReduce(RedOp); 328*f3e90472SFraser Cormack Reduction = Builder.CreateMul(Reduction, Start); 329*f3e90472SFraser Cormack break; 330*f3e90472SFraser Cormack case Intrinsic::vp_reduce_and: 331*f3e90472SFraser Cormack Reduction = Builder.CreateAndReduce(RedOp); 332*f3e90472SFraser Cormack Reduction = Builder.CreateAnd(Reduction, Start); 333*f3e90472SFraser Cormack break; 334*f3e90472SFraser Cormack case Intrinsic::vp_reduce_or: 335*f3e90472SFraser Cormack Reduction = Builder.CreateOrReduce(RedOp); 336*f3e90472SFraser Cormack Reduction = Builder.CreateOr(Reduction, Start); 337*f3e90472SFraser Cormack break; 338*f3e90472SFraser Cormack case Intrinsic::vp_reduce_xor: 339*f3e90472SFraser Cormack Reduction = Builder.CreateXorReduce(RedOp); 340*f3e90472SFraser Cormack Reduction = Builder.CreateXor(Reduction, Start); 341*f3e90472SFraser Cormack break; 342*f3e90472SFraser Cormack case Intrinsic::vp_reduce_smax: 343*f3e90472SFraser Cormack Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true); 344*f3e90472SFraser Cormack Reduction = 345*f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start); 346*f3e90472SFraser Cormack break; 347*f3e90472SFraser Cormack case Intrinsic::vp_reduce_smin: 348*f3e90472SFraser Cormack Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true); 349*f3e90472SFraser Cormack Reduction = 350*f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start); 351*f3e90472SFraser Cormack break; 352*f3e90472SFraser Cormack case Intrinsic::vp_reduce_umax: 353*f3e90472SFraser Cormack Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false); 354*f3e90472SFraser Cormack Reduction = 355*f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start); 356*f3e90472SFraser Cormack break; 357*f3e90472SFraser Cormack case Intrinsic::vp_reduce_umin: 358*f3e90472SFraser Cormack Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false); 359*f3e90472SFraser Cormack Reduction = 360*f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start); 361*f3e90472SFraser Cormack break; 362*f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmax: 363*f3e90472SFraser Cormack Reduction = Builder.CreateFPMaxReduce(RedOp); 364*f3e90472SFraser Cormack transferDecorations(*Reduction, VPI); 365*f3e90472SFraser Cormack Reduction = 366*f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start); 367*f3e90472SFraser Cormack break; 368*f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmin: 369*f3e90472SFraser Cormack Reduction = Builder.CreateFPMinReduce(RedOp); 370*f3e90472SFraser Cormack transferDecorations(*Reduction, VPI); 371*f3e90472SFraser Cormack Reduction = 372*f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start); 373*f3e90472SFraser Cormack break; 374*f3e90472SFraser Cormack case Intrinsic::vp_reduce_fadd: 375*f3e90472SFraser Cormack Reduction = Builder.CreateFAddReduce(Start, RedOp); 376*f3e90472SFraser Cormack break; 377*f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmul: 378*f3e90472SFraser Cormack Reduction = Builder.CreateFMulReduce(Start, RedOp); 379*f3e90472SFraser Cormack break; 380*f3e90472SFraser Cormack } 381*f3e90472SFraser Cormack 382*f3e90472SFraser Cormack replaceOperation(*Reduction, VPI); 383*f3e90472SFraser Cormack return Reduction; 384*f3e90472SFraser Cormack } 385*f3e90472SFraser Cormack 3861db4dbbaSSimon Moll void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { 3871db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); 3881db4dbbaSSimon Moll 3891db4dbbaSSimon Moll if (VPI.canIgnoreVectorLengthParam()) 3901db4dbbaSSimon Moll return; 3911db4dbbaSSimon Moll 3921db4dbbaSSimon Moll Value *EVLParam = VPI.getVectorLengthParam(); 3931db4dbbaSSimon Moll if (!EVLParam) 3941db4dbbaSSimon Moll return; 3951db4dbbaSSimon Moll 3961db4dbbaSSimon Moll ElementCount StaticElemCount = VPI.getStaticVectorLength(); 3971db4dbbaSSimon Moll Value *MaxEVL = nullptr; 3981db4dbbaSSimon Moll Type *Int32Ty = Type::getInt32Ty(VPI.getContext()); 3991db4dbbaSSimon Moll if (StaticElemCount.isScalable()) { 4001db4dbbaSSimon Moll // TODO add caching 4011db4dbbaSSimon Moll auto *M = VPI.getModule(); 4021db4dbbaSSimon Moll Function *VScaleFunc = 4031db4dbbaSSimon Moll Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty); 4041db4dbbaSSimon Moll IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); 4051db4dbbaSSimon Moll Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue()); 4061db4dbbaSSimon Moll Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale"); 4071db4dbbaSSimon Moll MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size", 4081db4dbbaSSimon Moll /*NUW*/ true, /*NSW*/ false); 4091db4dbbaSSimon Moll } else { 4101db4dbbaSSimon Moll MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false); 4111db4dbbaSSimon Moll } 4121db4dbbaSSimon Moll VPI.setVectorLengthParam(MaxEVL); 4131db4dbbaSSimon Moll } 4141db4dbbaSSimon Moll 4151db4dbbaSSimon Moll Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { 4161db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n'); 4171db4dbbaSSimon Moll 4181db4dbbaSSimon Moll IRBuilder<> Builder(&VPI); 4191db4dbbaSSimon Moll 4201db4dbbaSSimon Moll // Ineffective %evl parameter and so nothing to do here. 4211db4dbbaSSimon Moll if (VPI.canIgnoreVectorLengthParam()) 4221db4dbbaSSimon Moll return &VPI; 4231db4dbbaSSimon Moll 4241db4dbbaSSimon Moll // Only VP intrinsics can have an %evl parameter. 4251db4dbbaSSimon Moll Value *OldMaskParam = VPI.getMaskParam(); 4261db4dbbaSSimon Moll Value *OldEVLParam = VPI.getVectorLengthParam(); 4271db4dbbaSSimon Moll assert(OldMaskParam && "no mask param to fold the vl param into"); 4281db4dbbaSSimon Moll assert(OldEVLParam && "no EVL param to fold away"); 4291db4dbbaSSimon Moll 4301db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n'); 4311db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n'); 4321db4dbbaSSimon Moll 4331db4dbbaSSimon Moll // Convert the %evl predication into vector mask predication. 4341db4dbbaSSimon Moll ElementCount ElemCount = VPI.getStaticVectorLength(); 4351db4dbbaSSimon Moll Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount); 4361db4dbbaSSimon Moll Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam); 4371db4dbbaSSimon Moll VPI.setMaskParam(NewMaskParam); 4381db4dbbaSSimon Moll 4391db4dbbaSSimon Moll // Drop the %evl parameter. 4401db4dbbaSSimon Moll discardEVLParameter(VPI); 4411db4dbbaSSimon Moll assert(VPI.canIgnoreVectorLengthParam() && 4421db4dbbaSSimon Moll "transformation did not render the evl param ineffective!"); 4431db4dbbaSSimon Moll 4441db4dbbaSSimon Moll // Reassess the modified instruction. 4451db4dbbaSSimon Moll return &VPI; 4461db4dbbaSSimon Moll } 4471db4dbbaSSimon Moll 4481db4dbbaSSimon Moll Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { 4491db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n'); 4501db4dbbaSSimon Moll 4511db4dbbaSSimon Moll IRBuilder<> Builder(&VPI); 4521db4dbbaSSimon Moll 4531db4dbbaSSimon Moll // Try lowering to a LLVM instruction first. 45466963bf3SSimon Moll auto OC = VPI.getFunctionalOpcode(); 4551db4dbbaSSimon Moll 45666963bf3SSimon Moll if (OC && Instruction::isBinaryOp(*OC)) 4571db4dbbaSSimon Moll return expandPredicationInBinaryOperator(Builder, VPI); 4581db4dbbaSSimon Moll 459*f3e90472SFraser Cormack if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) 460*f3e90472SFraser Cormack return expandPredicationInReduction(Builder, *VPRI); 461*f3e90472SFraser Cormack 4621db4dbbaSSimon Moll return &VPI; 4631db4dbbaSSimon Moll } 4641db4dbbaSSimon Moll 4651db4dbbaSSimon Moll //// } CachingVPExpander 4661db4dbbaSSimon Moll 4671db4dbbaSSimon Moll struct TransformJob { 4681db4dbbaSSimon Moll VPIntrinsic *PI; 4691db4dbbaSSimon Moll TargetTransformInfo::VPLegalization Strategy; 4701db4dbbaSSimon Moll TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat) 4711db4dbbaSSimon Moll : PI(PI), Strategy(InitStrat) {} 4721db4dbbaSSimon Moll 4731db4dbbaSSimon Moll bool isDone() const { return Strategy.shouldDoNothing(); } 4741db4dbbaSSimon Moll }; 4751db4dbbaSSimon Moll 4761db4dbbaSSimon Moll void sanitizeStrategy(Instruction &I, VPLegalization &LegalizeStrat) { 4771db4dbbaSSimon Moll // Speculatable instructions do not strictly need predication. 4781db4dbbaSSimon Moll if (isSafeToSpeculativelyExecute(&I)) { 4791db4dbbaSSimon Moll // Converting a speculatable VP intrinsic means dropping %mask and %evl. 4801db4dbbaSSimon Moll // No need to expand %evl into the %mask only to ignore that code. 4811db4dbbaSSimon Moll if (LegalizeStrat.OpStrategy == VPLegalization::Convert) 4821db4dbbaSSimon Moll LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; 4831db4dbbaSSimon Moll return; 4841db4dbbaSSimon Moll } 4851db4dbbaSSimon Moll 4861db4dbbaSSimon Moll // We have to preserve the predicating effect of %evl for this 4871db4dbbaSSimon Moll // non-speculatable VP intrinsic. 4881db4dbbaSSimon Moll // 1) Never discard %evl. 4891db4dbbaSSimon Moll // 2) If this VP intrinsic will be expanded to non-VP code, make sure that 4901db4dbbaSSimon Moll // %evl gets folded into %mask. 4911db4dbbaSSimon Moll if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || 4921db4dbbaSSimon Moll (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { 4931db4dbbaSSimon Moll LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; 4941db4dbbaSSimon Moll } 4951db4dbbaSSimon Moll } 4961db4dbbaSSimon Moll 4971db4dbbaSSimon Moll VPLegalization 4981db4dbbaSSimon Moll CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { 4991db4dbbaSSimon Moll auto VPStrat = TTI.getVPLegalizationStrategy(VPI); 5001db4dbbaSSimon Moll if (LLVM_LIKELY(!UsingTTIOverrides)) { 5011db4dbbaSSimon Moll // No overrides - we are in production. 5021db4dbbaSSimon Moll return VPStrat; 5031db4dbbaSSimon Moll } 5041db4dbbaSSimon Moll 5051db4dbbaSSimon Moll // Overrides set - we are in testing, the following does not need to be 5061db4dbbaSSimon Moll // efficient. 5071db4dbbaSSimon Moll VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride); 5081db4dbbaSSimon Moll VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride); 5091db4dbbaSSimon Moll return VPStrat; 5101db4dbbaSSimon Moll } 5111db4dbbaSSimon Moll 5121db4dbbaSSimon Moll /// \brief Expand llvm.vp.* intrinsics as requested by \p TTI. 5131db4dbbaSSimon Moll bool CachingVPExpander::expandVectorPredication() { 5141db4dbbaSSimon Moll SmallVector<TransformJob, 16> Worklist; 5151db4dbbaSSimon Moll 5161db4dbbaSSimon Moll // Collect all VPIntrinsics that need expansion and determine their expansion 5171db4dbbaSSimon Moll // strategy. 5181db4dbbaSSimon Moll for (auto &I : instructions(F)) { 5191db4dbbaSSimon Moll auto *VPI = dyn_cast<VPIntrinsic>(&I); 5201db4dbbaSSimon Moll if (!VPI) 5211db4dbbaSSimon Moll continue; 5221db4dbbaSSimon Moll auto VPStrat = getVPLegalizationStrategy(*VPI); 5231db4dbbaSSimon Moll sanitizeStrategy(I, VPStrat); 5241db4dbbaSSimon Moll if (!VPStrat.shouldDoNothing()) 5251db4dbbaSSimon Moll Worklist.emplace_back(VPI, VPStrat); 5261db4dbbaSSimon Moll } 5271db4dbbaSSimon Moll if (Worklist.empty()) 5281db4dbbaSSimon Moll return false; 5291db4dbbaSSimon Moll 5301db4dbbaSSimon Moll // Transform all VPIntrinsics on the worklist. 5311db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size() 5321db4dbbaSSimon Moll << " instructions ::::\n"); 5331db4dbbaSSimon Moll for (TransformJob Job : Worklist) { 5341db4dbbaSSimon Moll // Transform the EVL parameter. 5351db4dbbaSSimon Moll switch (Job.Strategy.EVLParamStrategy) { 5361db4dbbaSSimon Moll case VPLegalization::Legal: 5371db4dbbaSSimon Moll break; 5381db4dbbaSSimon Moll case VPLegalization::Discard: 5391db4dbbaSSimon Moll discardEVLParameter(*Job.PI); 5401db4dbbaSSimon Moll break; 5411db4dbbaSSimon Moll case VPLegalization::Convert: 5421db4dbbaSSimon Moll if (foldEVLIntoMask(*Job.PI)) 5431db4dbbaSSimon Moll ++NumFoldedVL; 5441db4dbbaSSimon Moll break; 5451db4dbbaSSimon Moll } 5461db4dbbaSSimon Moll Job.Strategy.EVLParamStrategy = VPLegalization::Legal; 5471db4dbbaSSimon Moll 5481db4dbbaSSimon Moll // Replace with a non-predicated operation. 5491db4dbbaSSimon Moll switch (Job.Strategy.OpStrategy) { 5501db4dbbaSSimon Moll case VPLegalization::Legal: 5511db4dbbaSSimon Moll break; 5521db4dbbaSSimon Moll case VPLegalization::Discard: 5531db4dbbaSSimon Moll llvm_unreachable("Invalid strategy for operators."); 5541db4dbbaSSimon Moll case VPLegalization::Convert: 5551db4dbbaSSimon Moll expandPredication(*Job.PI); 5561db4dbbaSSimon Moll ++NumLoweredVPOps; 5571db4dbbaSSimon Moll break; 5581db4dbbaSSimon Moll } 5591db4dbbaSSimon Moll Job.Strategy.OpStrategy = VPLegalization::Legal; 5601db4dbbaSSimon Moll 5611db4dbbaSSimon Moll assert(Job.isDone() && "incomplete transformation"); 5621db4dbbaSSimon Moll } 5631db4dbbaSSimon Moll 5641db4dbbaSSimon Moll return true; 5651db4dbbaSSimon Moll } 5661db4dbbaSSimon Moll class ExpandVectorPredication : public FunctionPass { 5671db4dbbaSSimon Moll public: 5681db4dbbaSSimon Moll static char ID; 5691db4dbbaSSimon Moll ExpandVectorPredication() : FunctionPass(ID) { 5701db4dbbaSSimon Moll initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry()); 5711db4dbbaSSimon Moll } 5721db4dbbaSSimon Moll 5731db4dbbaSSimon Moll bool runOnFunction(Function &F) override { 5741db4dbbaSSimon Moll const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 5751db4dbbaSSimon Moll CachingVPExpander VPExpander(F, *TTI); 5761db4dbbaSSimon Moll return VPExpander.expandVectorPredication(); 5771db4dbbaSSimon Moll } 5781db4dbbaSSimon Moll 5791db4dbbaSSimon Moll void getAnalysisUsage(AnalysisUsage &AU) const override { 5801db4dbbaSSimon Moll AU.addRequired<TargetTransformInfoWrapperPass>(); 5811db4dbbaSSimon Moll AU.setPreservesCFG(); 5821db4dbbaSSimon Moll } 5831db4dbbaSSimon Moll }; 5841db4dbbaSSimon Moll } // namespace 5851db4dbbaSSimon Moll 5861db4dbbaSSimon Moll char ExpandVectorPredication::ID; 5871db4dbbaSSimon Moll INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp", 5881db4dbbaSSimon Moll "Expand vector predication intrinsics", false, false) 5891db4dbbaSSimon Moll INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 5901db4dbbaSSimon Moll INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 5911db4dbbaSSimon Moll INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp", 5921db4dbbaSSimon Moll "Expand vector predication intrinsics", false, false) 5931db4dbbaSSimon Moll 5941db4dbbaSSimon Moll FunctionPass *llvm::createExpandVectorPredicationPass() { 5951db4dbbaSSimon Moll return new ExpandVectorPredication(); 5961db4dbbaSSimon Moll } 5971db4dbbaSSimon Moll 5981db4dbbaSSimon Moll PreservedAnalyses 5991db4dbbaSSimon Moll ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) { 6001db4dbbaSSimon Moll const auto &TTI = AM.getResult<TargetIRAnalysis>(F); 6011db4dbbaSSimon Moll CachingVPExpander VPExpander(F, TTI); 6021db4dbbaSSimon Moll if (!VPExpander.expandVectorPredication()) 6031db4dbbaSSimon Moll return PreservedAnalyses::all(); 6041db4dbbaSSimon Moll PreservedAnalyses PA; 6051db4dbbaSSimon Moll PA.preserveSet<CFGAnalyses>(); 6061db4dbbaSSimon Moll return PA; 6071db4dbbaSSimon Moll } 608