11db4dbbaSSimon Moll //===----- CodeGen/ExpandVectorPredication.cpp - Expand VP intrinsics -----===// 21db4dbbaSSimon Moll // 31db4dbbaSSimon Moll // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 41db4dbbaSSimon Moll // See https://llvm.org/LICENSE.txt for license information. 51db4dbbaSSimon Moll // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 61db4dbbaSSimon Moll // 71db4dbbaSSimon Moll //===----------------------------------------------------------------------===// 81db4dbbaSSimon Moll // 91db4dbbaSSimon Moll // This pass implements IR expansion for vector predication intrinsics, allowing 101db4dbbaSSimon Moll // targets to enable vector predication until just before codegen. 111db4dbbaSSimon Moll // 121db4dbbaSSimon Moll //===----------------------------------------------------------------------===// 131db4dbbaSSimon Moll 141db4dbbaSSimon Moll #include "llvm/CodeGen/ExpandVectorPredication.h" 151db4dbbaSSimon Moll #include "llvm/ADT/Statistic.h" 161db4dbbaSSimon Moll #include "llvm/Analysis/TargetTransformInfo.h" 171db4dbbaSSimon Moll #include "llvm/Analysis/ValueTracking.h" 181db4dbbaSSimon Moll #include "llvm/CodeGen/Passes.h" 191db4dbbaSSimon Moll #include "llvm/IR/Constants.h" 201db4dbbaSSimon Moll #include "llvm/IR/Function.h" 211db4dbbaSSimon Moll #include "llvm/IR/IRBuilder.h" 221db4dbbaSSimon Moll #include "llvm/IR/InstIterator.h" 231db4dbbaSSimon Moll #include "llvm/IR/Instructions.h" 241db4dbbaSSimon Moll #include "llvm/IR/IntrinsicInst.h" 251db4dbbaSSimon Moll #include "llvm/IR/Intrinsics.h" 261db4dbbaSSimon Moll #include "llvm/InitializePasses.h" 271db4dbbaSSimon Moll #include "llvm/Pass.h" 281db4dbbaSSimon Moll #include "llvm/Support/CommandLine.h" 291db4dbbaSSimon Moll #include "llvm/Support/Compiler.h" 301db4dbbaSSimon Moll #include "llvm/Support/Debug.h" 311db4dbbaSSimon Moll 321db4dbbaSSimon Moll using namespace llvm; 331db4dbbaSSimon Moll 341db4dbbaSSimon Moll using VPLegalization = TargetTransformInfo::VPLegalization; 351db4dbbaSSimon Moll using VPTransform = TargetTransformInfo::VPLegalization::VPTransform; 361db4dbbaSSimon Moll 371db4dbbaSSimon Moll // Keep this in sync with TargetTransformInfo::VPLegalization. 381db4dbbaSSimon Moll #define VPINTERNAL_VPLEGAL_CASES \ 391db4dbbaSSimon Moll VPINTERNAL_CASE(Legal) \ 401db4dbbaSSimon Moll VPINTERNAL_CASE(Discard) \ 411db4dbbaSSimon Moll VPINTERNAL_CASE(Convert) 421db4dbbaSSimon Moll 431db4dbbaSSimon Moll #define VPINTERNAL_CASE(X) "|" #X 441db4dbbaSSimon Moll 451db4dbbaSSimon Moll // Override options. 461db4dbbaSSimon Moll static cl::opt<std::string> EVLTransformOverride( 471db4dbbaSSimon Moll "expandvp-override-evl-transform", cl::init(""), cl::Hidden, 481db4dbbaSSimon Moll cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 491db4dbbaSSimon Moll ". If non-empty, ignore " 501db4dbbaSSimon Moll "TargetTransformInfo and " 511db4dbbaSSimon Moll "always use this transformation for the %evl parameter (Used in " 521db4dbbaSSimon Moll "testing).")); 531db4dbbaSSimon Moll 541db4dbbaSSimon Moll static cl::opt<std::string> MaskTransformOverride( 551db4dbbaSSimon Moll "expandvp-override-mask-transform", cl::init(""), cl::Hidden, 561db4dbbaSSimon Moll cl::desc("Options: <empty>" VPINTERNAL_VPLEGAL_CASES 571db4dbbaSSimon Moll ". If non-empty, Ignore " 581db4dbbaSSimon Moll "TargetTransformInfo and " 591db4dbbaSSimon Moll "always use this transformation for the %mask parameter (Used in " 601db4dbbaSSimon Moll "testing).")); 611db4dbbaSSimon Moll 621db4dbbaSSimon Moll #undef VPINTERNAL_CASE 631db4dbbaSSimon Moll #define VPINTERNAL_CASE(X) .Case(#X, VPLegalization::X) 641db4dbbaSSimon Moll 651db4dbbaSSimon Moll static VPTransform parseOverrideOption(const std::string &TextOpt) { 661db4dbbaSSimon Moll return StringSwitch<VPTransform>(TextOpt) VPINTERNAL_VPLEGAL_CASES; 671db4dbbaSSimon Moll } 681db4dbbaSSimon Moll 691db4dbbaSSimon Moll #undef VPINTERNAL_VPLEGAL_CASES 701db4dbbaSSimon Moll 711db4dbbaSSimon Moll // Whether any override options are set. 721db4dbbaSSimon Moll static bool anyExpandVPOverridesSet() { 731db4dbbaSSimon Moll return !EVLTransformOverride.empty() || !MaskTransformOverride.empty(); 741db4dbbaSSimon Moll } 751db4dbbaSSimon Moll 761db4dbbaSSimon Moll #define DEBUG_TYPE "expandvp" 771db4dbbaSSimon Moll 781db4dbbaSSimon Moll STATISTIC(NumFoldedVL, "Number of folded vector length params"); 791db4dbbaSSimon Moll STATISTIC(NumLoweredVPOps, "Number of folded vector predication operations"); 801db4dbbaSSimon Moll 811db4dbbaSSimon Moll ///// Helpers { 821db4dbbaSSimon Moll 831db4dbbaSSimon Moll /// \returns Whether the vector mask \p MaskVal has all lane bits set. 841db4dbbaSSimon Moll static bool isAllTrueMask(Value *MaskVal) { 851db4dbbaSSimon Moll auto *ConstVec = dyn_cast<ConstantVector>(MaskVal); 861db4dbbaSSimon Moll return ConstVec && ConstVec->isAllOnesValue(); 871db4dbbaSSimon Moll } 881db4dbbaSSimon Moll 891db4dbbaSSimon Moll /// \returns A non-excepting divisor constant for this type. 901db4dbbaSSimon Moll static Constant *getSafeDivisor(Type *DivTy) { 911db4dbbaSSimon Moll assert(DivTy->isIntOrIntVectorTy() && "Unsupported divisor type"); 921db4dbbaSSimon Moll return ConstantInt::get(DivTy, 1u, false); 931db4dbbaSSimon Moll } 941db4dbbaSSimon Moll 951db4dbbaSSimon Moll /// Transfer operation properties from \p OldVPI to \p NewVal. 961db4dbbaSSimon Moll static void transferDecorations(Value &NewVal, VPIntrinsic &VPI) { 971db4dbbaSSimon Moll auto *NewInst = dyn_cast<Instruction>(&NewVal); 981db4dbbaSSimon Moll if (!NewInst || !isa<FPMathOperator>(NewVal)) 991db4dbbaSSimon Moll return; 1001db4dbbaSSimon Moll 1011db4dbbaSSimon Moll auto *OldFMOp = dyn_cast<FPMathOperator>(&VPI); 1021db4dbbaSSimon Moll if (!OldFMOp) 1031db4dbbaSSimon Moll return; 1041db4dbbaSSimon Moll 1051db4dbbaSSimon Moll NewInst->setFastMathFlags(OldFMOp->getFastMathFlags()); 1061db4dbbaSSimon Moll } 1071db4dbbaSSimon Moll 1081db4dbbaSSimon Moll /// Transfer all properties from \p OldOp to \p NewOp and replace all uses. 1091db4dbbaSSimon Moll /// OldVP gets erased. 1101db4dbbaSSimon Moll static void replaceOperation(Value &NewOp, VPIntrinsic &OldOp) { 1111db4dbbaSSimon Moll transferDecorations(NewOp, OldOp); 1121db4dbbaSSimon Moll OldOp.replaceAllUsesWith(&NewOp); 1131db4dbbaSSimon Moll OldOp.eraseFromParent(); 1141db4dbbaSSimon Moll } 1151db4dbbaSSimon Moll 1166e127110SSimon Moll static bool maySpeculateLanes(VPIntrinsic &VPI) { 1176e127110SSimon Moll // The result of VP reductions depends on the mask and evl. 1186e127110SSimon Moll if (isa<VPReductionIntrinsic>(VPI)) 1196e127110SSimon Moll return false; 1206e127110SSimon Moll // Fallback to whether the intrinsic is speculatable. 12118c1ee04SSimon Moll Optional<unsigned> OpcOpt = VPI.getFunctionalOpcode(); 122*129b531cSKazu Hirata unsigned FunctionalOpc = OpcOpt.value_or((unsigned)Instruction::Call); 12318c1ee04SSimon Moll return isSafeToSpeculativelyExecuteWithOpcode(FunctionalOpc, 12418c1ee04SSimon Moll cast<Operator>(&VPI)); 1256e127110SSimon Moll } 1266e127110SSimon Moll 1271db4dbbaSSimon Moll //// } Helpers 1281db4dbbaSSimon Moll 1291db4dbbaSSimon Moll namespace { 1301db4dbbaSSimon Moll 1311db4dbbaSSimon Moll // Expansion pass state at function scope. 1321db4dbbaSSimon Moll struct CachingVPExpander { 1331db4dbbaSSimon Moll Function &F; 1341db4dbbaSSimon Moll const TargetTransformInfo &TTI; 1351db4dbbaSSimon Moll 1361db4dbbaSSimon Moll /// \returns A (fixed length) vector with ascending integer indices 1371db4dbbaSSimon Moll /// (<0, 1, ..., NumElems-1>). 1381db4dbbaSSimon Moll /// \p Builder 1391db4dbbaSSimon Moll /// Used for instruction creation. 1401db4dbbaSSimon Moll /// \p LaneTy 1411db4dbbaSSimon Moll /// Integer element type of the result vector. 1421db4dbbaSSimon Moll /// \p NumElems 1431db4dbbaSSimon Moll /// Number of vector elements. 1441db4dbbaSSimon Moll Value *createStepVector(IRBuilder<> &Builder, Type *LaneTy, 1451db4dbbaSSimon Moll unsigned NumElems); 1461db4dbbaSSimon Moll 1471db4dbbaSSimon Moll /// \returns A bitmask that is true where the lane position is less-than \p 1481db4dbbaSSimon Moll /// EVLParam 1491db4dbbaSSimon Moll /// 1501db4dbbaSSimon Moll /// \p Builder 1511db4dbbaSSimon Moll /// Used for instruction creation. 1521db4dbbaSSimon Moll /// \p VLParam 1531db4dbbaSSimon Moll /// The explicit vector length parameter to test against the lane 1541db4dbbaSSimon Moll /// positions. 1551db4dbbaSSimon Moll /// \p ElemCount 1561db4dbbaSSimon Moll /// Static (potentially scalable) number of vector elements. 1571db4dbbaSSimon Moll Value *convertEVLToMask(IRBuilder<> &Builder, Value *EVLParam, 1581db4dbbaSSimon Moll ElementCount ElemCount); 1591db4dbbaSSimon Moll 1601db4dbbaSSimon Moll Value *foldEVLIntoMask(VPIntrinsic &VPI); 1611db4dbbaSSimon Moll 1621db4dbbaSSimon Moll /// "Remove" the %evl parameter of \p PI by setting it to the static vector 1631db4dbbaSSimon Moll /// length of the operation. 1641db4dbbaSSimon Moll void discardEVLParameter(VPIntrinsic &PI); 1651db4dbbaSSimon Moll 1661db4dbbaSSimon Moll /// \brief Lower this VP binary operator to a unpredicated binary operator. 1671db4dbbaSSimon Moll Value *expandPredicationInBinaryOperator(IRBuilder<> &Builder, 1681db4dbbaSSimon Moll VPIntrinsic &PI); 1691db4dbbaSSimon Moll 170f3e90472SFraser Cormack /// \brief Lower this VP reduction to a call to an unpredicated reduction 171f3e90472SFraser Cormack /// intrinsic. 172f3e90472SFraser Cormack Value *expandPredicationInReduction(IRBuilder<> &Builder, 173f3e90472SFraser Cormack VPReductionIntrinsic &PI); 174f3e90472SFraser Cormack 1751db4dbbaSSimon Moll /// \brief Query TTI and expand the vector predication in \p P accordingly. 1761db4dbbaSSimon Moll Value *expandPredication(VPIntrinsic &PI); 1771db4dbbaSSimon Moll 1781db4dbbaSSimon Moll /// \brief Determine how and whether the VPIntrinsic \p VPI shall be 1791db4dbbaSSimon Moll /// expanded. This overrides TTI with the cl::opts listed at the top of this 1801db4dbbaSSimon Moll /// file. 1811db4dbbaSSimon Moll VPLegalization getVPLegalizationStrategy(const VPIntrinsic &VPI) const; 1821db4dbbaSSimon Moll bool UsingTTIOverrides; 1831db4dbbaSSimon Moll 1841db4dbbaSSimon Moll public: 1851db4dbbaSSimon Moll CachingVPExpander(Function &F, const TargetTransformInfo &TTI) 1861db4dbbaSSimon Moll : F(F), TTI(TTI), UsingTTIOverrides(anyExpandVPOverridesSet()) {} 1871db4dbbaSSimon Moll 1881db4dbbaSSimon Moll bool expandVectorPredication(); 1891db4dbbaSSimon Moll }; 1901db4dbbaSSimon Moll 1911db4dbbaSSimon Moll //// CachingVPExpander { 1921db4dbbaSSimon Moll 1931db4dbbaSSimon Moll Value *CachingVPExpander::createStepVector(IRBuilder<> &Builder, Type *LaneTy, 1941db4dbbaSSimon Moll unsigned NumElems) { 1951db4dbbaSSimon Moll // TODO add caching 1961db4dbbaSSimon Moll SmallVector<Constant *, 16> ConstElems; 1971db4dbbaSSimon Moll 1981db4dbbaSSimon Moll for (unsigned Idx = 0; Idx < NumElems; ++Idx) 1991db4dbbaSSimon Moll ConstElems.push_back(ConstantInt::get(LaneTy, Idx, false)); 2001db4dbbaSSimon Moll 2011db4dbbaSSimon Moll return ConstantVector::get(ConstElems); 2021db4dbbaSSimon Moll } 2031db4dbbaSSimon Moll 2041db4dbbaSSimon Moll Value *CachingVPExpander::convertEVLToMask(IRBuilder<> &Builder, 2051db4dbbaSSimon Moll Value *EVLParam, 2061db4dbbaSSimon Moll ElementCount ElemCount) { 2071db4dbbaSSimon Moll // TODO add caching 2081db4dbbaSSimon Moll // Scalable vector %evl conversion. 2091db4dbbaSSimon Moll if (ElemCount.isScalable()) { 2101db4dbbaSSimon Moll auto *M = Builder.GetInsertBlock()->getModule(); 2111db4dbbaSSimon Moll Type *BoolVecTy = VectorType::get(Builder.getInt1Ty(), ElemCount); 2121db4dbbaSSimon Moll Function *ActiveMaskFunc = Intrinsic::getDeclaration( 2131db4dbbaSSimon Moll M, Intrinsic::get_active_lane_mask, {BoolVecTy, EVLParam->getType()}); 2141db4dbbaSSimon Moll // `get_active_lane_mask` performs an implicit less-than comparison. 2151db4dbbaSSimon Moll Value *ConstZero = Builder.getInt32(0); 2161db4dbbaSSimon Moll return Builder.CreateCall(ActiveMaskFunc, {ConstZero, EVLParam}); 2171db4dbbaSSimon Moll } 2181db4dbbaSSimon Moll 2191db4dbbaSSimon Moll // Fixed vector %evl conversion. 2201db4dbbaSSimon Moll Type *LaneTy = EVLParam->getType(); 2211db4dbbaSSimon Moll unsigned NumElems = ElemCount.getFixedValue(); 2221db4dbbaSSimon Moll Value *VLSplat = Builder.CreateVectorSplat(NumElems, EVLParam); 2231db4dbbaSSimon Moll Value *IdxVec = createStepVector(Builder, LaneTy, NumElems); 2241db4dbbaSSimon Moll return Builder.CreateICmp(CmpInst::ICMP_ULT, IdxVec, VLSplat); 2251db4dbbaSSimon Moll } 2261db4dbbaSSimon Moll 2271db4dbbaSSimon Moll Value * 2281db4dbbaSSimon Moll CachingVPExpander::expandPredicationInBinaryOperator(IRBuilder<> &Builder, 2291db4dbbaSSimon Moll VPIntrinsic &VPI) { 2306e127110SSimon Moll assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 2311db4dbbaSSimon Moll "Implicitly dropping %evl in non-speculatable operator!"); 2321db4dbbaSSimon Moll 23366963bf3SSimon Moll auto OC = static_cast<Instruction::BinaryOps>(*VPI.getFunctionalOpcode()); 2341db4dbbaSSimon Moll assert(Instruction::isBinaryOp(OC)); 2351db4dbbaSSimon Moll 2361db4dbbaSSimon Moll Value *Op0 = VPI.getOperand(0); 2371db4dbbaSSimon Moll Value *Op1 = VPI.getOperand(1); 2381db4dbbaSSimon Moll Value *Mask = VPI.getMaskParam(); 2391db4dbbaSSimon Moll 2401db4dbbaSSimon Moll // Blend in safe operands. 2411db4dbbaSSimon Moll if (Mask && !isAllTrueMask(Mask)) { 2421db4dbbaSSimon Moll switch (OC) { 2431db4dbbaSSimon Moll default: 2441db4dbbaSSimon Moll // Can safely ignore the predicate. 2451db4dbbaSSimon Moll break; 2461db4dbbaSSimon Moll 2471db4dbbaSSimon Moll // Division operators need a safe divisor on masked-off lanes (1). 2481db4dbbaSSimon Moll case Instruction::UDiv: 2491db4dbbaSSimon Moll case Instruction::SDiv: 2501db4dbbaSSimon Moll case Instruction::URem: 2511db4dbbaSSimon Moll case Instruction::SRem: 2521db4dbbaSSimon Moll // 2nd operand must not be zero. 2531db4dbbaSSimon Moll Value *SafeDivisor = getSafeDivisor(VPI.getType()); 2541db4dbbaSSimon Moll Op1 = Builder.CreateSelect(Mask, Op1, SafeDivisor); 2551db4dbbaSSimon Moll } 2561db4dbbaSSimon Moll } 2571db4dbbaSSimon Moll 2581db4dbbaSSimon Moll Value *NewBinOp = Builder.CreateBinOp(OC, Op0, Op1, VPI.getName()); 2591db4dbbaSSimon Moll 2601db4dbbaSSimon Moll replaceOperation(*NewBinOp, VPI); 2611db4dbbaSSimon Moll return NewBinOp; 2621db4dbbaSSimon Moll } 2631db4dbbaSSimon Moll 264f3e90472SFraser Cormack static Value *getNeutralReductionElement(const VPReductionIntrinsic &VPI, 265f3e90472SFraser Cormack Type *EltTy) { 266f3e90472SFraser Cormack bool Negative = false; 267f3e90472SFraser Cormack unsigned EltBits = EltTy->getScalarSizeInBits(); 268f3e90472SFraser Cormack switch (VPI.getIntrinsicID()) { 269f3e90472SFraser Cormack default: 270f3e90472SFraser Cormack llvm_unreachable("Expecting a VP reduction intrinsic"); 271f3e90472SFraser Cormack case Intrinsic::vp_reduce_add: 272f3e90472SFraser Cormack case Intrinsic::vp_reduce_or: 273f3e90472SFraser Cormack case Intrinsic::vp_reduce_xor: 274f3e90472SFraser Cormack case Intrinsic::vp_reduce_umax: 275f3e90472SFraser Cormack return Constant::getNullValue(EltTy); 276f3e90472SFraser Cormack case Intrinsic::vp_reduce_mul: 277f3e90472SFraser Cormack return ConstantInt::get(EltTy, 1, /*IsSigned*/ false); 278f3e90472SFraser Cormack case Intrinsic::vp_reduce_and: 279f3e90472SFraser Cormack case Intrinsic::vp_reduce_umin: 280f3e90472SFraser Cormack return ConstantInt::getAllOnesValue(EltTy); 281f3e90472SFraser Cormack case Intrinsic::vp_reduce_smin: 282f3e90472SFraser Cormack return ConstantInt::get(EltTy->getContext(), 283f3e90472SFraser Cormack APInt::getSignedMaxValue(EltBits)); 284f3e90472SFraser Cormack case Intrinsic::vp_reduce_smax: 285f3e90472SFraser Cormack return ConstantInt::get(EltTy->getContext(), 286f3e90472SFraser Cormack APInt::getSignedMinValue(EltBits)); 287f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmax: 288f3e90472SFraser Cormack Negative = true; 289f3e90472SFraser Cormack LLVM_FALLTHROUGH; 290f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmin: { 291f3e90472SFraser Cormack FastMathFlags Flags = VPI.getFastMathFlags(); 292f3e90472SFraser Cormack const fltSemantics &Semantics = EltTy->getFltSemantics(); 293f3e90472SFraser Cormack return !Flags.noNaNs() ? ConstantFP::getQNaN(EltTy, Negative) 294f3e90472SFraser Cormack : !Flags.noInfs() 295f3e90472SFraser Cormack ? ConstantFP::getInfinity(EltTy, Negative) 296f3e90472SFraser Cormack : ConstantFP::get(EltTy, 297f3e90472SFraser Cormack APFloat::getLargest(Semantics, Negative)); 298f3e90472SFraser Cormack } 299f3e90472SFraser Cormack case Intrinsic::vp_reduce_fadd: 300f3e90472SFraser Cormack return ConstantFP::getNegativeZero(EltTy); 301f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmul: 302f3e90472SFraser Cormack return ConstantFP::get(EltTy, 1.0); 303f3e90472SFraser Cormack } 304f3e90472SFraser Cormack } 305f3e90472SFraser Cormack 306f3e90472SFraser Cormack Value * 307f3e90472SFraser Cormack CachingVPExpander::expandPredicationInReduction(IRBuilder<> &Builder, 308f3e90472SFraser Cormack VPReductionIntrinsic &VPI) { 3096e127110SSimon Moll assert((maySpeculateLanes(VPI) || VPI.canIgnoreVectorLengthParam()) && 310f3e90472SFraser Cormack "Implicitly dropping %evl in non-speculatable operator!"); 311f3e90472SFraser Cormack 312f3e90472SFraser Cormack Value *Mask = VPI.getMaskParam(); 313f3e90472SFraser Cormack Value *RedOp = VPI.getOperand(VPI.getVectorParamPos()); 314f3e90472SFraser Cormack 315f3e90472SFraser Cormack // Insert neutral element in masked-out positions 316f3e90472SFraser Cormack if (Mask && !isAllTrueMask(Mask)) { 317f3e90472SFraser Cormack auto *NeutralElt = getNeutralReductionElement(VPI, VPI.getType()); 318f3e90472SFraser Cormack auto *NeutralVector = Builder.CreateVectorSplat( 319f3e90472SFraser Cormack cast<VectorType>(RedOp->getType())->getElementCount(), NeutralElt); 320f3e90472SFraser Cormack RedOp = Builder.CreateSelect(Mask, RedOp, NeutralVector); 321f3e90472SFraser Cormack } 322f3e90472SFraser Cormack 323f3e90472SFraser Cormack Value *Reduction; 324f3e90472SFraser Cormack Value *Start = VPI.getOperand(VPI.getStartParamPos()); 325f3e90472SFraser Cormack 326f3e90472SFraser Cormack switch (VPI.getIntrinsicID()) { 327f3e90472SFraser Cormack default: 328f3e90472SFraser Cormack llvm_unreachable("Impossible reduction kind"); 329f3e90472SFraser Cormack case Intrinsic::vp_reduce_add: 330f3e90472SFraser Cormack Reduction = Builder.CreateAddReduce(RedOp); 331f3e90472SFraser Cormack Reduction = Builder.CreateAdd(Reduction, Start); 332f3e90472SFraser Cormack break; 333f3e90472SFraser Cormack case Intrinsic::vp_reduce_mul: 334f3e90472SFraser Cormack Reduction = Builder.CreateMulReduce(RedOp); 335f3e90472SFraser Cormack Reduction = Builder.CreateMul(Reduction, Start); 336f3e90472SFraser Cormack break; 337f3e90472SFraser Cormack case Intrinsic::vp_reduce_and: 338f3e90472SFraser Cormack Reduction = Builder.CreateAndReduce(RedOp); 339f3e90472SFraser Cormack Reduction = Builder.CreateAnd(Reduction, Start); 340f3e90472SFraser Cormack break; 341f3e90472SFraser Cormack case Intrinsic::vp_reduce_or: 342f3e90472SFraser Cormack Reduction = Builder.CreateOrReduce(RedOp); 343f3e90472SFraser Cormack Reduction = Builder.CreateOr(Reduction, Start); 344f3e90472SFraser Cormack break; 345f3e90472SFraser Cormack case Intrinsic::vp_reduce_xor: 346f3e90472SFraser Cormack Reduction = Builder.CreateXorReduce(RedOp); 347f3e90472SFraser Cormack Reduction = Builder.CreateXor(Reduction, Start); 348f3e90472SFraser Cormack break; 349f3e90472SFraser Cormack case Intrinsic::vp_reduce_smax: 350f3e90472SFraser Cormack Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ true); 351f3e90472SFraser Cormack Reduction = 352f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::smax, Reduction, Start); 353f3e90472SFraser Cormack break; 354f3e90472SFraser Cormack case Intrinsic::vp_reduce_smin: 355f3e90472SFraser Cormack Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ true); 356f3e90472SFraser Cormack Reduction = 357f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::smin, Reduction, Start); 358f3e90472SFraser Cormack break; 359f3e90472SFraser Cormack case Intrinsic::vp_reduce_umax: 360f3e90472SFraser Cormack Reduction = Builder.CreateIntMaxReduce(RedOp, /*IsSigned*/ false); 361f3e90472SFraser Cormack Reduction = 362f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::umax, Reduction, Start); 363f3e90472SFraser Cormack break; 364f3e90472SFraser Cormack case Intrinsic::vp_reduce_umin: 365f3e90472SFraser Cormack Reduction = Builder.CreateIntMinReduce(RedOp, /*IsSigned*/ false); 366f3e90472SFraser Cormack Reduction = 367f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::umin, Reduction, Start); 368f3e90472SFraser Cormack break; 369f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmax: 370f3e90472SFraser Cormack Reduction = Builder.CreateFPMaxReduce(RedOp); 371f3e90472SFraser Cormack transferDecorations(*Reduction, VPI); 372f3e90472SFraser Cormack Reduction = 373f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::maxnum, Reduction, Start); 374f3e90472SFraser Cormack break; 375f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmin: 376f3e90472SFraser Cormack Reduction = Builder.CreateFPMinReduce(RedOp); 377f3e90472SFraser Cormack transferDecorations(*Reduction, VPI); 378f3e90472SFraser Cormack Reduction = 379f3e90472SFraser Cormack Builder.CreateBinaryIntrinsic(Intrinsic::minnum, Reduction, Start); 380f3e90472SFraser Cormack break; 381f3e90472SFraser Cormack case Intrinsic::vp_reduce_fadd: 382f3e90472SFraser Cormack Reduction = Builder.CreateFAddReduce(Start, RedOp); 383f3e90472SFraser Cormack break; 384f3e90472SFraser Cormack case Intrinsic::vp_reduce_fmul: 385f3e90472SFraser Cormack Reduction = Builder.CreateFMulReduce(Start, RedOp); 386f3e90472SFraser Cormack break; 387f3e90472SFraser Cormack } 388f3e90472SFraser Cormack 389f3e90472SFraser Cormack replaceOperation(*Reduction, VPI); 390f3e90472SFraser Cormack return Reduction; 391f3e90472SFraser Cormack } 392f3e90472SFraser Cormack 3931db4dbbaSSimon Moll void CachingVPExpander::discardEVLParameter(VPIntrinsic &VPI) { 3941db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "Discard EVL parameter in " << VPI << "\n"); 3951db4dbbaSSimon Moll 3961db4dbbaSSimon Moll if (VPI.canIgnoreVectorLengthParam()) 3971db4dbbaSSimon Moll return; 3981db4dbbaSSimon Moll 3991db4dbbaSSimon Moll Value *EVLParam = VPI.getVectorLengthParam(); 4001db4dbbaSSimon Moll if (!EVLParam) 4011db4dbbaSSimon Moll return; 4021db4dbbaSSimon Moll 4031db4dbbaSSimon Moll ElementCount StaticElemCount = VPI.getStaticVectorLength(); 4041db4dbbaSSimon Moll Value *MaxEVL = nullptr; 4051db4dbbaSSimon Moll Type *Int32Ty = Type::getInt32Ty(VPI.getContext()); 4061db4dbbaSSimon Moll if (StaticElemCount.isScalable()) { 4071db4dbbaSSimon Moll // TODO add caching 4081db4dbbaSSimon Moll auto *M = VPI.getModule(); 4091db4dbbaSSimon Moll Function *VScaleFunc = 4101db4dbbaSSimon Moll Intrinsic::getDeclaration(M, Intrinsic::vscale, Int32Ty); 4111db4dbbaSSimon Moll IRBuilder<> Builder(VPI.getParent(), VPI.getIterator()); 4121db4dbbaSSimon Moll Value *FactorConst = Builder.getInt32(StaticElemCount.getKnownMinValue()); 4131db4dbbaSSimon Moll Value *VScale = Builder.CreateCall(VScaleFunc, {}, "vscale"); 4141db4dbbaSSimon Moll MaxEVL = Builder.CreateMul(VScale, FactorConst, "scalable_size", 4151db4dbbaSSimon Moll /*NUW*/ true, /*NSW*/ false); 4161db4dbbaSSimon Moll } else { 4171db4dbbaSSimon Moll MaxEVL = ConstantInt::get(Int32Ty, StaticElemCount.getFixedValue(), false); 4181db4dbbaSSimon Moll } 4191db4dbbaSSimon Moll VPI.setVectorLengthParam(MaxEVL); 4201db4dbbaSSimon Moll } 4211db4dbbaSSimon Moll 4221db4dbbaSSimon Moll Value *CachingVPExpander::foldEVLIntoMask(VPIntrinsic &VPI) { 4231db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "Folding vlen for " << VPI << '\n'); 4241db4dbbaSSimon Moll 4251db4dbbaSSimon Moll IRBuilder<> Builder(&VPI); 4261db4dbbaSSimon Moll 4271db4dbbaSSimon Moll // Ineffective %evl parameter and so nothing to do here. 4281db4dbbaSSimon Moll if (VPI.canIgnoreVectorLengthParam()) 4291db4dbbaSSimon Moll return &VPI; 4301db4dbbaSSimon Moll 4311db4dbbaSSimon Moll // Only VP intrinsics can have an %evl parameter. 4321db4dbbaSSimon Moll Value *OldMaskParam = VPI.getMaskParam(); 4331db4dbbaSSimon Moll Value *OldEVLParam = VPI.getVectorLengthParam(); 4341db4dbbaSSimon Moll assert(OldMaskParam && "no mask param to fold the vl param into"); 4351db4dbbaSSimon Moll assert(OldEVLParam && "no EVL param to fold away"); 4361db4dbbaSSimon Moll 4371db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "OLD evl: " << *OldEVLParam << '\n'); 4381db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "OLD mask: " << *OldMaskParam << '\n'); 4391db4dbbaSSimon Moll 4401db4dbbaSSimon Moll // Convert the %evl predication into vector mask predication. 4411db4dbbaSSimon Moll ElementCount ElemCount = VPI.getStaticVectorLength(); 4421db4dbbaSSimon Moll Value *VLMask = convertEVLToMask(Builder, OldEVLParam, ElemCount); 4431db4dbbaSSimon Moll Value *NewMaskParam = Builder.CreateAnd(VLMask, OldMaskParam); 4441db4dbbaSSimon Moll VPI.setMaskParam(NewMaskParam); 4451db4dbbaSSimon Moll 4461db4dbbaSSimon Moll // Drop the %evl parameter. 4471db4dbbaSSimon Moll discardEVLParameter(VPI); 4481db4dbbaSSimon Moll assert(VPI.canIgnoreVectorLengthParam() && 4491db4dbbaSSimon Moll "transformation did not render the evl param ineffective!"); 4501db4dbbaSSimon Moll 4511db4dbbaSSimon Moll // Reassess the modified instruction. 4521db4dbbaSSimon Moll return &VPI; 4531db4dbbaSSimon Moll } 4541db4dbbaSSimon Moll 4551db4dbbaSSimon Moll Value *CachingVPExpander::expandPredication(VPIntrinsic &VPI) { 4561db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "Lowering to unpredicated op: " << VPI << '\n'); 4571db4dbbaSSimon Moll 4581db4dbbaSSimon Moll IRBuilder<> Builder(&VPI); 4591db4dbbaSSimon Moll 4601db4dbbaSSimon Moll // Try lowering to a LLVM instruction first. 46166963bf3SSimon Moll auto OC = VPI.getFunctionalOpcode(); 4621db4dbbaSSimon Moll 46366963bf3SSimon Moll if (OC && Instruction::isBinaryOp(*OC)) 4641db4dbbaSSimon Moll return expandPredicationInBinaryOperator(Builder, VPI); 4651db4dbbaSSimon Moll 466f3e90472SFraser Cormack if (auto *VPRI = dyn_cast<VPReductionIntrinsic>(&VPI)) 467f3e90472SFraser Cormack return expandPredicationInReduction(Builder, *VPRI); 468f3e90472SFraser Cormack 4691db4dbbaSSimon Moll return &VPI; 4701db4dbbaSSimon Moll } 4711db4dbbaSSimon Moll 4721db4dbbaSSimon Moll //// } CachingVPExpander 4731db4dbbaSSimon Moll 4741db4dbbaSSimon Moll struct TransformJob { 4751db4dbbaSSimon Moll VPIntrinsic *PI; 4761db4dbbaSSimon Moll TargetTransformInfo::VPLegalization Strategy; 4771db4dbbaSSimon Moll TransformJob(VPIntrinsic *PI, TargetTransformInfo::VPLegalization InitStrat) 4781db4dbbaSSimon Moll : PI(PI), Strategy(InitStrat) {} 4791db4dbbaSSimon Moll 4801db4dbbaSSimon Moll bool isDone() const { return Strategy.shouldDoNothing(); } 4811db4dbbaSSimon Moll }; 4821db4dbbaSSimon Moll 4836e127110SSimon Moll void sanitizeStrategy(VPIntrinsic &VPI, VPLegalization &LegalizeStrat) { 48418c1ee04SSimon Moll // Operations with speculatable lanes do not strictly need predication. 4856e127110SSimon Moll if (maySpeculateLanes(VPI)) { 4861db4dbbaSSimon Moll // Converting a speculatable VP intrinsic means dropping %mask and %evl. 4871db4dbbaSSimon Moll // No need to expand %evl into the %mask only to ignore that code. 4881db4dbbaSSimon Moll if (LegalizeStrat.OpStrategy == VPLegalization::Convert) 4891db4dbbaSSimon Moll LegalizeStrat.EVLParamStrategy = VPLegalization::Discard; 4901db4dbbaSSimon Moll return; 4911db4dbbaSSimon Moll } 4921db4dbbaSSimon Moll 4931db4dbbaSSimon Moll // We have to preserve the predicating effect of %evl for this 4941db4dbbaSSimon Moll // non-speculatable VP intrinsic. 4951db4dbbaSSimon Moll // 1) Never discard %evl. 4961db4dbbaSSimon Moll // 2) If this VP intrinsic will be expanded to non-VP code, make sure that 4971db4dbbaSSimon Moll // %evl gets folded into %mask. 4981db4dbbaSSimon Moll if ((LegalizeStrat.EVLParamStrategy == VPLegalization::Discard) || 4991db4dbbaSSimon Moll (LegalizeStrat.OpStrategy == VPLegalization::Convert)) { 5001db4dbbaSSimon Moll LegalizeStrat.EVLParamStrategy = VPLegalization::Convert; 5011db4dbbaSSimon Moll } 5021db4dbbaSSimon Moll } 5031db4dbbaSSimon Moll 5041db4dbbaSSimon Moll VPLegalization 5051db4dbbaSSimon Moll CachingVPExpander::getVPLegalizationStrategy(const VPIntrinsic &VPI) const { 5061db4dbbaSSimon Moll auto VPStrat = TTI.getVPLegalizationStrategy(VPI); 5071db4dbbaSSimon Moll if (LLVM_LIKELY(!UsingTTIOverrides)) { 5081db4dbbaSSimon Moll // No overrides - we are in production. 5091db4dbbaSSimon Moll return VPStrat; 5101db4dbbaSSimon Moll } 5111db4dbbaSSimon Moll 5121db4dbbaSSimon Moll // Overrides set - we are in testing, the following does not need to be 5131db4dbbaSSimon Moll // efficient. 5141db4dbbaSSimon Moll VPStrat.EVLParamStrategy = parseOverrideOption(EVLTransformOverride); 5151db4dbbaSSimon Moll VPStrat.OpStrategy = parseOverrideOption(MaskTransformOverride); 5161db4dbbaSSimon Moll return VPStrat; 5171db4dbbaSSimon Moll } 5181db4dbbaSSimon Moll 5191db4dbbaSSimon Moll /// \brief Expand llvm.vp.* intrinsics as requested by \p TTI. 5201db4dbbaSSimon Moll bool CachingVPExpander::expandVectorPredication() { 5211db4dbbaSSimon Moll SmallVector<TransformJob, 16> Worklist; 5221db4dbbaSSimon Moll 5231db4dbbaSSimon Moll // Collect all VPIntrinsics that need expansion and determine their expansion 5241db4dbbaSSimon Moll // strategy. 5251db4dbbaSSimon Moll for (auto &I : instructions(F)) { 5261db4dbbaSSimon Moll auto *VPI = dyn_cast<VPIntrinsic>(&I); 5271db4dbbaSSimon Moll if (!VPI) 5281db4dbbaSSimon Moll continue; 5291db4dbbaSSimon Moll auto VPStrat = getVPLegalizationStrategy(*VPI); 5306e127110SSimon Moll sanitizeStrategy(*VPI, VPStrat); 5311db4dbbaSSimon Moll if (!VPStrat.shouldDoNothing()) 5321db4dbbaSSimon Moll Worklist.emplace_back(VPI, VPStrat); 5331db4dbbaSSimon Moll } 5341db4dbbaSSimon Moll if (Worklist.empty()) 5351db4dbbaSSimon Moll return false; 5361db4dbbaSSimon Moll 5371db4dbbaSSimon Moll // Transform all VPIntrinsics on the worklist. 5381db4dbbaSSimon Moll LLVM_DEBUG(dbgs() << "\n:::: Transforming " << Worklist.size() 5391db4dbbaSSimon Moll << " instructions ::::\n"); 5401db4dbbaSSimon Moll for (TransformJob Job : Worklist) { 5411db4dbbaSSimon Moll // Transform the EVL parameter. 5421db4dbbaSSimon Moll switch (Job.Strategy.EVLParamStrategy) { 5431db4dbbaSSimon Moll case VPLegalization::Legal: 5441db4dbbaSSimon Moll break; 5451db4dbbaSSimon Moll case VPLegalization::Discard: 5461db4dbbaSSimon Moll discardEVLParameter(*Job.PI); 5471db4dbbaSSimon Moll break; 5481db4dbbaSSimon Moll case VPLegalization::Convert: 5491db4dbbaSSimon Moll if (foldEVLIntoMask(*Job.PI)) 5501db4dbbaSSimon Moll ++NumFoldedVL; 5511db4dbbaSSimon Moll break; 5521db4dbbaSSimon Moll } 5531db4dbbaSSimon Moll Job.Strategy.EVLParamStrategy = VPLegalization::Legal; 5541db4dbbaSSimon Moll 5551db4dbbaSSimon Moll // Replace with a non-predicated operation. 5561db4dbbaSSimon Moll switch (Job.Strategy.OpStrategy) { 5571db4dbbaSSimon Moll case VPLegalization::Legal: 5581db4dbbaSSimon Moll break; 5591db4dbbaSSimon Moll case VPLegalization::Discard: 5601db4dbbaSSimon Moll llvm_unreachable("Invalid strategy for operators."); 5611db4dbbaSSimon Moll case VPLegalization::Convert: 5621db4dbbaSSimon Moll expandPredication(*Job.PI); 5631db4dbbaSSimon Moll ++NumLoweredVPOps; 5641db4dbbaSSimon Moll break; 5651db4dbbaSSimon Moll } 5661db4dbbaSSimon Moll Job.Strategy.OpStrategy = VPLegalization::Legal; 5671db4dbbaSSimon Moll 5681db4dbbaSSimon Moll assert(Job.isDone() && "incomplete transformation"); 5691db4dbbaSSimon Moll } 5701db4dbbaSSimon Moll 5711db4dbbaSSimon Moll return true; 5721db4dbbaSSimon Moll } 5731db4dbbaSSimon Moll class ExpandVectorPredication : public FunctionPass { 5741db4dbbaSSimon Moll public: 5751db4dbbaSSimon Moll static char ID; 5761db4dbbaSSimon Moll ExpandVectorPredication() : FunctionPass(ID) { 5771db4dbbaSSimon Moll initializeExpandVectorPredicationPass(*PassRegistry::getPassRegistry()); 5781db4dbbaSSimon Moll } 5791db4dbbaSSimon Moll 5801db4dbbaSSimon Moll bool runOnFunction(Function &F) override { 5811db4dbbaSSimon Moll const auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 5821db4dbbaSSimon Moll CachingVPExpander VPExpander(F, *TTI); 5831db4dbbaSSimon Moll return VPExpander.expandVectorPredication(); 5841db4dbbaSSimon Moll } 5851db4dbbaSSimon Moll 5861db4dbbaSSimon Moll void getAnalysisUsage(AnalysisUsage &AU) const override { 5871db4dbbaSSimon Moll AU.addRequired<TargetTransformInfoWrapperPass>(); 5881db4dbbaSSimon Moll AU.setPreservesCFG(); 5891db4dbbaSSimon Moll } 5901db4dbbaSSimon Moll }; 5911db4dbbaSSimon Moll } // namespace 5921db4dbbaSSimon Moll 5931db4dbbaSSimon Moll char ExpandVectorPredication::ID; 5941db4dbbaSSimon Moll INITIALIZE_PASS_BEGIN(ExpandVectorPredication, "expandvp", 5951db4dbbaSSimon Moll "Expand vector predication intrinsics", false, false) 5961db4dbbaSSimon Moll INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 5971db4dbbaSSimon Moll INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 5981db4dbbaSSimon Moll INITIALIZE_PASS_END(ExpandVectorPredication, "expandvp", 5991db4dbbaSSimon Moll "Expand vector predication intrinsics", false, false) 6001db4dbbaSSimon Moll 6011db4dbbaSSimon Moll FunctionPass *llvm::createExpandVectorPredicationPass() { 6021db4dbbaSSimon Moll return new ExpandVectorPredication(); 6031db4dbbaSSimon Moll } 6041db4dbbaSSimon Moll 6051db4dbbaSSimon Moll PreservedAnalyses 6061db4dbbaSSimon Moll ExpandVectorPredicationPass::run(Function &F, FunctionAnalysisManager &AM) { 6071db4dbbaSSimon Moll const auto &TTI = AM.getResult<TargetIRAnalysis>(F); 6081db4dbbaSSimon Moll CachingVPExpander VPExpander(F, TTI); 6091db4dbbaSSimon Moll if (!VPExpander.expandVectorPredication()) 6101db4dbbaSSimon Moll return PreservedAnalyses::all(); 6111db4dbbaSSimon Moll PreservedAnalyses PA; 6121db4dbbaSSimon Moll PA.preserveSet<CFGAnalyses>(); 6131db4dbbaSSimon Moll return PA; 6141db4dbbaSSimon Moll } 615