196c8bc79SSam Elliott //===-- RISCVTargetTransformInfo.cpp - RISC-V specific TTI ----------------===//
296c8bc79SSam Elliott //
396c8bc79SSam Elliott // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
496c8bc79SSam Elliott // See https://llvm.org/LICENSE.txt for license information.
596c8bc79SSam Elliott // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
696c8bc79SSam Elliott //
796c8bc79SSam Elliott //===----------------------------------------------------------------------===//
896c8bc79SSam Elliott 
996c8bc79SSam Elliott #include "RISCVTargetTransformInfo.h"
10387d3c24SCraig Topper #include "MCTargetDesc/RISCVMatInt.h"
1196c8bc79SSam Elliott #include "llvm/Analysis/TargetTransformInfo.h"
1296c8bc79SSam Elliott #include "llvm/CodeGen/BasicTTIImpl.h"
1396c8bc79SSam Elliott #include "llvm/CodeGen/TargetLowering.h"
14ecd7a013SYeting Kuo #include <cmath>
1596c8bc79SSam Elliott using namespace llvm;
1696c8bc79SSam Elliott 
1796c8bc79SSam Elliott #define DEBUG_TYPE "riscvtti"
1896c8bc79SSam Elliott 
19042394b6SCraig Topper static cl::opt<unsigned> RVVRegisterWidthLMUL(
20042394b6SCraig Topper     "riscv-v-register-bit-width-lmul",
21042394b6SCraig Topper     cl::desc(
22042394b6SCraig Topper         "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
23042394b6SCraig Topper         "by autovectorized code. Fractional LMULs are not supported."),
24042394b6SCraig Topper     cl::init(1), cl::Hidden);
25042394b6SCraig Topper 
getIntImmCost(const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)26f9a50f04SSander de Smalen InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
2740574fefSSam Parker                                             TTI::TargetCostKind CostKind) {
2896c8bc79SSam Elliott   assert(Ty->isIntegerTy() &&
2996c8bc79SSam Elliott          "getIntImmCost can only estimate cost of materialising integers");
3096c8bc79SSam Elliott 
3196c8bc79SSam Elliott   // We have a Zero register, so 0 is always free.
3296c8bc79SSam Elliott   if (Imm == 0)
3396c8bc79SSam Elliott     return TTI::TCC_Free;
3496c8bc79SSam Elliott 
3596c8bc79SSam Elliott   // Otherwise, we check how many instructions it will take to materialise.
3696c8bc79SSam Elliott   const DataLayout &DL = getDataLayout();
3796c8bc79SSam Elliott   return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty),
384dbb7880SCraig Topper                                     getST()->getFeatureBits());
3996c8bc79SSam Elliott }
4096c8bc79SSam Elliott 
getIntImmCostInst(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind,Instruction * Inst)41f9a50f04SSander de Smalen InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
42a3d0dce2SMeera Nakrani                                                 const APInt &Imm, Type *Ty,
43a3d0dce2SMeera Nakrani                                                 TTI::TargetCostKind CostKind,
44a3d0dce2SMeera Nakrani                                                 Instruction *Inst) {
4596c8bc79SSam Elliott   assert(Ty->isIntegerTy() &&
4696c8bc79SSam Elliott          "getIntImmCost can only estimate cost of materialising integers");
4796c8bc79SSam Elliott 
4896c8bc79SSam Elliott   // We have a Zero register, so 0 is always free.
4996c8bc79SSam Elliott   if (Imm == 0)
5096c8bc79SSam Elliott     return TTI::TCC_Free;
5196c8bc79SSam Elliott 
5296c8bc79SSam Elliott   // Some instructions in RISC-V can take a 12-bit immediate. Some of these are
5396c8bc79SSam Elliott   // commutative, in others the immediate comes from a specific argument index.
5496c8bc79SSam Elliott   bool Takes12BitImm = false;
5596c8bc79SSam Elliott   unsigned ImmArgIdx = ~0U;
5696c8bc79SSam Elliott 
5796c8bc79SSam Elliott   switch (Opcode) {
5896c8bc79SSam Elliott   case Instruction::GetElementPtr:
5996c8bc79SSam Elliott     // Never hoist any arguments to a GetElementPtr. CodeGenPrepare will
6096c8bc79SSam Elliott     // split up large offsets in GEP into better parts than ConstantHoisting
6196c8bc79SSam Elliott     // can.
6296c8bc79SSam Elliott     return TTI::TCC_Free;
6396c8bc79SSam Elliott   case Instruction::And:
64f7023891SCraig Topper     // zext.h
65f7023891SCraig Topper     if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
66f7023891SCraig Topper       return TTI::TCC_Free;
67f7023891SCraig Topper     // zext.w
68bc0d6565SCraig Topper     if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
69f7023891SCraig Topper       return TTI::TCC_Free;
70f7023891SCraig Topper     LLVM_FALLTHROUGH;
71f7023891SCraig Topper   case Instruction::Add:
7296c8bc79SSam Elliott   case Instruction::Or:
7396c8bc79SSam Elliott   case Instruction::Xor:
7496c8bc79SSam Elliott   case Instruction::Mul:
7596c8bc79SSam Elliott     Takes12BitImm = true;
7696c8bc79SSam Elliott     break;
7796c8bc79SSam Elliott   case Instruction::Sub:
7896c8bc79SSam Elliott   case Instruction::Shl:
7996c8bc79SSam Elliott   case Instruction::LShr:
8096c8bc79SSam Elliott   case Instruction::AShr:
8196c8bc79SSam Elliott     Takes12BitImm = true;
8296c8bc79SSam Elliott     ImmArgIdx = 1;
8396c8bc79SSam Elliott     break;
8496c8bc79SSam Elliott   default:
8596c8bc79SSam Elliott     break;
8696c8bc79SSam Elliott   }
8796c8bc79SSam Elliott 
8896c8bc79SSam Elliott   if (Takes12BitImm) {
8996c8bc79SSam Elliott     // Check immediate is the correct argument...
9096c8bc79SSam Elliott     if (Instruction::isCommutative(Opcode) || Idx == ImmArgIdx) {
9196c8bc79SSam Elliott       // ... and fits into the 12-bit immediate.
92e0831dacSAlex Bradbury       if (Imm.getMinSignedBits() <= 64 &&
93e0831dacSAlex Bradbury           getTLI()->isLegalAddImmediate(Imm.getSExtValue())) {
9496c8bc79SSam Elliott         return TTI::TCC_Free;
9596c8bc79SSam Elliott       }
96e0831dacSAlex Bradbury     }
9796c8bc79SSam Elliott 
9896c8bc79SSam Elliott     // Otherwise, use the full materialisation cost.
9940574fefSSam Parker     return getIntImmCost(Imm, Ty, CostKind);
10096c8bc79SSam Elliott   }
10196c8bc79SSam Elliott 
10296c8bc79SSam Elliott   // By default, prevent hoisting.
10396c8bc79SSam Elliott   return TTI::TCC_Free;
10496c8bc79SSam Elliott }
10596c8bc79SSam Elliott 
106f9a50f04SSander de Smalen InstructionCost
getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)107f9a50f04SSander de Smalen RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
10840574fefSSam Parker                                   const APInt &Imm, Type *Ty,
10940574fefSSam Parker                                   TTI::TargetCostKind CostKind) {
11096c8bc79SSam Elliott   // Prevent hoisting in unknown cases.
11196c8bc79SSam Elliott   return TTI::TCC_Free;
11296c8bc79SSam Elliott }
113e046c0c2SFraser Cormack 
114f24f09d2SCraig Topper TargetTransformInfo::PopcntSupportKind
getPopcntSupport(unsigned TyWidth)115f24f09d2SCraig Topper RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) {
116f24f09d2SCraig Topper   assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
117f24f09d2SCraig Topper   return ST->hasStdExtZbb() ? TTI::PSK_FastHardware : TTI::PSK_Software;
118f24f09d2SCraig Topper }
119f24f09d2SCraig Topper 
shouldExpandReduction(const IntrinsicInst * II) const120e046c0c2SFraser Cormack bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
121e046c0c2SFraser Cormack   // Currently, the ExpandReductions pass can't expand scalable-vector
122e046c0c2SFraser Cormack   // reductions, but we still request expansion as RVV doesn't support certain
123e046c0c2SFraser Cormack   // reductions and the SelectionDAG can't legalize them either.
124e046c0c2SFraser Cormack   switch (II->getIntrinsicID()) {
125e046c0c2SFraser Cormack   default:
126e046c0c2SFraser Cormack     return false;
127b46aac12SFraser Cormack   // These reductions have no equivalent in RVV
128e046c0c2SFraser Cormack   case Intrinsic::vector_reduce_mul:
129e046c0c2SFraser Cormack   case Intrinsic::vector_reduce_fmul:
130e046c0c2SFraser Cormack     return true;
131e046c0c2SFraser Cormack   }
132e046c0c2SFraser Cormack }
133a5b07a22SHsiangkai Wang 
getMaxVScale() const134a5b07a22SHsiangkai Wang Optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
1354710e789SPhilip Reames   if (ST->hasVInstructions())
1364710e789SPhilip Reames     return ST->getRealMaxVLen() / RISCV::RVVBitsPerBlock;
137a5b07a22SHsiangkai Wang   return BaseT::getMaxVScale();
138a5b07a22SHsiangkai Wang }
139512bae81SCraig Topper 
getVScaleForTuning() const1409803b0d1SPhilip Reames Optional<unsigned> RISCVTTIImpl::getVScaleForTuning() const {
1419803b0d1SPhilip Reames   if (ST->hasVInstructions())
1429803b0d1SPhilip Reames     return ST->getRealMinVLen() / RISCV::RVVBitsPerBlock;
1439803b0d1SPhilip Reames   return BaseT::getVScaleForTuning();
1449803b0d1SPhilip Reames }
1459803b0d1SPhilip Reames 
146042394b6SCraig Topper TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const147042394b6SCraig Topper RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
148042394b6SCraig Topper   unsigned LMUL = PowerOf2Floor(
149042394b6SCraig Topper       std::max<unsigned>(std::min<unsigned>(RVVRegisterWidthLMUL, 8), 1));
150042394b6SCraig Topper   switch (K) {
151042394b6SCraig Topper   case TargetTransformInfo::RGK_Scalar:
152042394b6SCraig Topper     return TypeSize::getFixed(ST->getXLen());
153042394b6SCraig Topper   case TargetTransformInfo::RGK_FixedWidthVector:
154042394b6SCraig Topper     return TypeSize::getFixed(
155f1b1bcdbSPhilip Reames         ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
156042394b6SCraig Topper   case TargetTransformInfo::RGK_ScalableVector:
157042394b6SCraig Topper     return TypeSize::getScalable(
158042394b6SCraig Topper         ST->hasVInstructions() ? LMUL * RISCV::RVVBitsPerBlock : 0);
159042394b6SCraig Topper   }
160042394b6SCraig Topper 
161042394b6SCraig Topper   llvm_unreachable("Unsupported register kind");
162042394b6SCraig Topper }
163042394b6SCraig Topper 
getSpliceCost(VectorType * Tp,int Index)16409629215SCraig Topper InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) {
16509629215SCraig Topper   std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
16609629215SCraig Topper 
16709629215SCraig Topper   unsigned Cost = 2; // vslidedown+vslideup.
16809629215SCraig Topper   // TODO: LMUL should increase cost.
16909629215SCraig Topper   // TODO: Multiplying by LT.first implies this legalizes into multiple copies
17009629215SCraig Topper   // of similar code, but I think we expand through memory.
17109629215SCraig Topper   return Cost * LT.first;
17209629215SCraig Topper }
17309629215SCraig Topper 
getShuffleCost(TTI::ShuffleKind Kind,VectorType * Tp,ArrayRef<int> Mask,int Index,VectorType * SubTp,ArrayRef<const Value * > Args)17409629215SCraig Topper InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
17509629215SCraig Topper                                              VectorType *Tp, ArrayRef<int> Mask,
17639aa202aSVasileios Porpodas                                              int Index, VectorType *SubTp,
177fa8a9feaSVasileios Porpodas                                              ArrayRef<const Value *> Args) {
1783fa58762SPhilip Reames   if (isa<ScalableVectorType>(Tp)) {
1790e29a80fSPhilip Reames     std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
1803fa58762SPhilip Reames     switch (Kind) {
1813fa58762SPhilip Reames     default:
1823fa58762SPhilip Reames       // Fallthrough to generic handling.
1833fa58762SPhilip Reames       // TODO: Most of these cases will return getInvalid in generic code, and
1843fa58762SPhilip Reames       // must be implemented here.
1853fa58762SPhilip Reames       break;
1863fa58762SPhilip Reames     case TTI::SK_Broadcast: {
18703a36542SLiaoChunyu       return LT.first * 1;
1883fa58762SPhilip Reames     }
1893fa58762SPhilip Reames     case TTI::SK_Splice:
1903fa58762SPhilip Reames       return getSpliceCost(Tp, Index);
1910e29a80fSPhilip Reames     case TTI::SK_Reverse:
1920e29a80fSPhilip Reames       // Most of the cost here is producing the vrgather index register
1930e29a80fSPhilip Reames       // Example sequence:
1940e29a80fSPhilip Reames       //   csrr a0, vlenb
1950e29a80fSPhilip Reames       //   srli a0, a0, 3
1960e29a80fSPhilip Reames       //   addi a0, a0, -1
1970e29a80fSPhilip Reames       //   vsetvli a1, zero, e8, mf8, ta, mu (ignored)
1980e29a80fSPhilip Reames       //   vid.v v9
1990e29a80fSPhilip Reames       //   vrsub.vx v10, v9, a0
2000e29a80fSPhilip Reames       //   vrgather.vv v9, v8, v10
201*dca821d8SLian Wang       if (Tp->getElementType()->isIntegerTy(1))
202*dca821d8SLian Wang         // Mask operation additionally required extend and truncate
203*dca821d8SLian Wang         return LT.first * 9;
2040e29a80fSPhilip Reames       return LT.first * 6;
2053fa58762SPhilip Reames     }
2063fa58762SPhilip Reames   }
20703a36542SLiaoChunyu 
20809629215SCraig Topper   return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
20909629215SCraig Topper }
21009629215SCraig Topper 
21189f15fc6SAlex Tsao InstructionCost
getMaskedMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind)21289f15fc6SAlex Tsao RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
21389f15fc6SAlex Tsao                                     unsigned AddressSpace,
21489f15fc6SAlex Tsao                                     TTI::TargetCostKind CostKind) {
21589f15fc6SAlex Tsao   if (!isa<ScalableVectorType>(Src))
21689f15fc6SAlex Tsao     return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
21789f15fc6SAlex Tsao                                         CostKind);
21889f15fc6SAlex Tsao 
21989f15fc6SAlex Tsao   return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
22089f15fc6SAlex Tsao }
22189f15fc6SAlex Tsao 
getGatherScatterOpCost(unsigned Opcode,Type * DataTy,const Value * Ptr,bool VariableMask,Align Alignment,TTI::TargetCostKind CostKind,const Instruction * I)222fd1f8a54SSander de Smalen InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
223512bae81SCraig Topper     unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
224512bae81SCraig Topper     Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
225512bae81SCraig Topper   if (CostKind != TTI::TCK_RecipThroughput)
226512bae81SCraig Topper     return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
227512bae81SCraig Topper                                          Alignment, CostKind, I);
228512bae81SCraig Topper 
229512bae81SCraig Topper   if ((Opcode == Instruction::Load &&
230512bae81SCraig Topper        !isLegalMaskedGather(DataTy, Align(Alignment))) ||
231512bae81SCraig Topper       (Opcode == Instruction::Store &&
232512bae81SCraig Topper        !isLegalMaskedScatter(DataTy, Align(Alignment))))
233512bae81SCraig Topper     return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
234512bae81SCraig Topper                                          Alignment, CostKind, I);
235512bae81SCraig Topper 
236d764aa7fSPhilip Reames   // Cost is proportional to the number of memory operations implied.  For
237d764aa7fSPhilip Reames   // scalable vectors, we use an upper bound on that number since we don't
238d764aa7fSPhilip Reames   // know exactly what VL will be.
239d764aa7fSPhilip Reames   auto &VTy = *cast<VectorType>(DataTy);
240d764aa7fSPhilip Reames   InstructionCost MemOpCost = getMemoryOpCost(Opcode, VTy.getElementType(),
241d764aa7fSPhilip Reames                                               Alignment, 0, CostKind, I);
242aadc9d26SPhilip Reames   unsigned NumLoads = getMaxVLFor(&VTy);
243512bae81SCraig Topper   return NumLoads * MemOpCost;
244512bae81SCraig Topper }
245f95ee607SMichael Berg 
246505fce5aSLiaoChunyu InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes & ICA,TTI::TargetCostKind CostKind)247505fce5aSLiaoChunyu RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
248505fce5aSLiaoChunyu                                     TTI::TargetCostKind CostKind) {
249505fce5aSLiaoChunyu   auto *RetTy = ICA.getReturnType();
250505fce5aSLiaoChunyu   switch (ICA.getID()) {
251505fce5aSLiaoChunyu   // TODO: add more intrinsic
252505fce5aSLiaoChunyu   case Intrinsic::experimental_stepvector: {
253505fce5aSLiaoChunyu     unsigned Cost = 1; // vid
254505fce5aSLiaoChunyu     auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
255505fce5aSLiaoChunyu     return Cost + (LT.first - 1);
256505fce5aSLiaoChunyu   }
257505fce5aSLiaoChunyu   default:
258505fce5aSLiaoChunyu     break;
259505fce5aSLiaoChunyu   }
260505fce5aSLiaoChunyu   return BaseT::getIntrinsicInstrCost(ICA, CostKind);
261505fce5aSLiaoChunyu }
262505fce5aSLiaoChunyu 
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,TTI::CastContextHint CCH,TTI::TargetCostKind CostKind,const Instruction * I)263ecd7a013SYeting Kuo InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
264ecd7a013SYeting Kuo                                                Type *Src,
265ecd7a013SYeting Kuo                                                TTI::CastContextHint CCH,
266ecd7a013SYeting Kuo                                                TTI::TargetCostKind CostKind,
267ecd7a013SYeting Kuo                                                const Instruction *I) {
268ecd7a013SYeting Kuo   if (isa<VectorType>(Dst) && isa<VectorType>(Src)) {
269ecd7a013SYeting Kuo     // FIXME: Need to compute legalizing cost for illegal types.
270ecd7a013SYeting Kuo     if (!isTypeLegal(Src) || !isTypeLegal(Dst))
271ecd7a013SYeting Kuo       return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
272ecd7a013SYeting Kuo 
273ecd7a013SYeting Kuo     // Skip if element size of Dst or Src is bigger than ELEN.
27476192182SCraig Topper     if (Src->getScalarSizeInBits() > ST->getELEN() ||
27576192182SCraig Topper         Dst->getScalarSizeInBits() > ST->getELEN())
276ecd7a013SYeting Kuo       return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
277ecd7a013SYeting Kuo 
278ecd7a013SYeting Kuo     int ISD = TLI->InstructionOpcodeToISD(Opcode);
279ecd7a013SYeting Kuo     assert(ISD && "Invalid opcode");
280ecd7a013SYeting Kuo 
281ecd7a013SYeting Kuo     // FIXME: Need to consider vsetvli and lmul.
282ecd7a013SYeting Kuo     int PowDiff = (int)Log2_32(Dst->getScalarSizeInBits()) -
283ecd7a013SYeting Kuo                   (int)Log2_32(Src->getScalarSizeInBits());
284ecd7a013SYeting Kuo     switch (ISD) {
285ecd7a013SYeting Kuo     case ISD::SIGN_EXTEND:
286ecd7a013SYeting Kuo     case ISD::ZERO_EXTEND:
287ecd7a013SYeting Kuo       return 1;
288ecd7a013SYeting Kuo     case ISD::TRUNCATE:
289ecd7a013SYeting Kuo     case ISD::FP_EXTEND:
290ecd7a013SYeting Kuo     case ISD::FP_ROUND:
291ecd7a013SYeting Kuo       // Counts of narrow/widen instructions.
292ecd7a013SYeting Kuo       return std::abs(PowDiff);
293ecd7a013SYeting Kuo     case ISD::FP_TO_SINT:
294ecd7a013SYeting Kuo     case ISD::FP_TO_UINT:
295ecd7a013SYeting Kuo     case ISD::SINT_TO_FP:
296ecd7a013SYeting Kuo     case ISD::UINT_TO_FP:
297ecd7a013SYeting Kuo       if (std::abs(PowDiff) <= 1)
298ecd7a013SYeting Kuo         return 1;
299ecd7a013SYeting Kuo       // Backend could lower (v[sz]ext i8 to double) to vfcvt(v[sz]ext.f8 i8),
300ecd7a013SYeting Kuo       // so it only need two conversion.
301ecd7a013SYeting Kuo       if (Src->isIntOrIntVectorTy())
302ecd7a013SYeting Kuo         return 2;
303ecd7a013SYeting Kuo       // Counts of narrow/widen instructions.
304ecd7a013SYeting Kuo       return std::abs(PowDiff);
305ecd7a013SYeting Kuo     }
306ecd7a013SYeting Kuo   }
307ecd7a013SYeting Kuo   return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
308ecd7a013SYeting Kuo }
309ecd7a013SYeting Kuo 
getMaxVLFor(VectorType * Ty)310aadc9d26SPhilip Reames unsigned RISCVTTIImpl::getMaxVLFor(VectorType *Ty) {
311aadc9d26SPhilip Reames   if (isa<ScalableVectorType>(Ty)) {
312aadc9d26SPhilip Reames     const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType());
313aadc9d26SPhilip Reames     const unsigned MinSize = DL.getTypeSizeInBits(Ty).getKnownMinValue();
314aadc9d26SPhilip Reames     const unsigned VectorBitsMax = ST->getRealMaxVLen();
315aadc9d26SPhilip Reames     return RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
316aadc9d26SPhilip Reames   }
317aadc9d26SPhilip Reames   return cast<FixedVectorType>(Ty)->getNumElements();
318aadc9d26SPhilip Reames }
319aadc9d26SPhilip Reames 
320ae7c6647SYeting Kuo InstructionCost
getMinMaxReductionCost(VectorType * Ty,VectorType * CondTy,bool IsUnsigned,TTI::TargetCostKind CostKind)321ae7c6647SYeting Kuo RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
322ae7c6647SYeting Kuo                                      bool IsUnsigned,
323ae7c6647SYeting Kuo                                      TTI::TargetCostKind CostKind) {
324aadc9d26SPhilip Reames   if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
325ae7c6647SYeting Kuo     return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
326ae7c6647SYeting Kuo 
327ae7c6647SYeting Kuo   // Skip if scalar size of Ty is bigger than ELEN.
32876192182SCraig Topper   if (Ty->getScalarSizeInBits() > ST->getELEN())
329ae7c6647SYeting Kuo     return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
330ae7c6647SYeting Kuo 
331536095a2SPhilip Reames   std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
332536095a2SPhilip Reames   if (Ty->getElementType()->isIntegerTy(1))
333536095a2SPhilip Reames     // vcpop sequences, see vreduction-mask.ll.  umax, smin actually only
334536095a2SPhilip Reames     // cost 2, but we don't have enough info here so we slightly over cost.
335536095a2SPhilip Reames     return (LT.first - 1) + 3;
336536095a2SPhilip Reames 
337ae7c6647SYeting Kuo   // IR Reduction is composed by two vmv and one rvv reduction instruction.
338ae7c6647SYeting Kuo   InstructionCost BaseCost = 2;
339aadc9d26SPhilip Reames   unsigned VL = getMaxVLFor(Ty);
340ae7c6647SYeting Kuo   return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
341ae7c6647SYeting Kuo }
342ae7c6647SYeting Kuo 
343ae7c6647SYeting Kuo InstructionCost
getArithmeticReductionCost(unsigned Opcode,VectorType * Ty,Optional<FastMathFlags> FMF,TTI::TargetCostKind CostKind)344ea690e70SPhilip Reames RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
345ae7c6647SYeting Kuo                                          Optional<FastMathFlags> FMF,
346ae7c6647SYeting Kuo                                          TTI::TargetCostKind CostKind) {
347aadc9d26SPhilip Reames   if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
348ea690e70SPhilip Reames     return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
349ae7c6647SYeting Kuo 
350ea690e70SPhilip Reames   // Skip if scalar size of Ty is bigger than ELEN.
351ea690e70SPhilip Reames   if (Ty->getScalarSizeInBits() > ST->getELEN())
352ea690e70SPhilip Reames     return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
353ae7c6647SYeting Kuo 
354ae7c6647SYeting Kuo   int ISD = TLI->InstructionOpcodeToISD(Opcode);
355ae7c6647SYeting Kuo   assert(ISD && "Invalid opcode");
356ae7c6647SYeting Kuo 
357ae7c6647SYeting Kuo   if (ISD != ISD::ADD && ISD != ISD::OR && ISD != ISD::XOR && ISD != ISD::AND &&
358ae7c6647SYeting Kuo       ISD != ISD::FADD)
359ea690e70SPhilip Reames     return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
360ae7c6647SYeting Kuo 
361ea690e70SPhilip Reames   std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
362ea690e70SPhilip Reames   if (Ty->getElementType()->isIntegerTy(1))
363536095a2SPhilip Reames     // vcpop sequences, see vreduction-mask.ll
364536095a2SPhilip Reames     return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
365536095a2SPhilip Reames 
366ae7c6647SYeting Kuo   // IR Reduction is composed by two vmv and one rvv reduction instruction.
367ae7c6647SYeting Kuo   InstructionCost BaseCost = 2;
368aadc9d26SPhilip Reames   unsigned VL = getMaxVLFor(Ty);
369ae7c6647SYeting Kuo   if (TTI::requiresOrderedReduction(FMF))
370ae7c6647SYeting Kuo     return (LT.first - 1) + BaseCost + VL;
371ae7c6647SYeting Kuo   return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
372ae7c6647SYeting Kuo }
373ae7c6647SYeting Kuo 
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,TTI::UnrollingPreferences & UP,OptimizationRemarkEmitter * ORE)374f95ee607SMichael Berg void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
375f95ee607SMichael Berg                                            TTI::UnrollingPreferences &UP,
376f95ee607SMichael Berg                                            OptimizationRemarkEmitter *ORE) {
377f95ee607SMichael Berg   // TODO: More tuning on benchmarks and metrics with changes as needed
378f95ee607SMichael Berg   //       would apply to all settings below to enable performance.
379f95ee607SMichael Berg 
380f95ee607SMichael Berg 
381861489afSZi Xuan Wu (Zeson)   if (ST->enableDefaultUnroll())
382f95ee607SMichael Berg     return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);
383f95ee607SMichael Berg 
384f95ee607SMichael Berg   // Enable Upper bound unrolling universally, not dependant upon the conditions
385f95ee607SMichael Berg   // below.
386f95ee607SMichael Berg   UP.UpperBound = true;
387f95ee607SMichael Berg 
388f95ee607SMichael Berg   // Disable loop unrolling for Oz and Os.
389f95ee607SMichael Berg   UP.OptSizeThreshold = 0;
390f95ee607SMichael Berg   UP.PartialOptSizeThreshold = 0;
391f95ee607SMichael Berg   if (L->getHeader()->getParent()->hasOptSize())
392f95ee607SMichael Berg     return;
393f95ee607SMichael Berg 
394f95ee607SMichael Berg   SmallVector<BasicBlock *, 4> ExitingBlocks;
395f95ee607SMichael Berg   L->getExitingBlocks(ExitingBlocks);
396f95ee607SMichael Berg   LLVM_DEBUG(dbgs() << "Loop has:\n"
397f95ee607SMichael Berg                     << "Blocks: " << L->getNumBlocks() << "\n"
398f95ee607SMichael Berg                     << "Exit blocks: " << ExitingBlocks.size() << "\n");
399f95ee607SMichael Berg 
400f95ee607SMichael Berg   // Only allow another exit other than the latch. This acts as an early exit
401f95ee607SMichael Berg   // as it mirrors the profitability calculation of the runtime unroller.
402f95ee607SMichael Berg   if (ExitingBlocks.size() > 2)
403f95ee607SMichael Berg     return;
404f95ee607SMichael Berg 
405f95ee607SMichael Berg   // Limit the CFG of the loop body for targets with a branch predictor.
406f95ee607SMichael Berg   // Allowing 4 blocks permits if-then-else diamonds in the body.
407f95ee607SMichael Berg   if (L->getNumBlocks() > 4)
408f95ee607SMichael Berg     return;
409f95ee607SMichael Berg 
410f95ee607SMichael Berg   // Don't unroll vectorized loops, including the remainder loop
411f95ee607SMichael Berg   if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
412f95ee607SMichael Berg     return;
413f95ee607SMichael Berg 
414f95ee607SMichael Berg   // Scan the loop: don't unroll loops with calls as this could prevent
415f95ee607SMichael Berg   // inlining.
416f95ee607SMichael Berg   InstructionCost Cost = 0;
417f95ee607SMichael Berg   for (auto *BB : L->getBlocks()) {
418f95ee607SMichael Berg     for (auto &I : *BB) {
419f95ee607SMichael Berg       // Initial setting - Don't unroll loops containing vectorized
420f95ee607SMichael Berg       // instructions.
421f95ee607SMichael Berg       if (I.getType()->isVectorTy())
422f95ee607SMichael Berg         return;
423f95ee607SMichael Berg 
424f95ee607SMichael Berg       if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
425f95ee607SMichael Berg         if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
426f95ee607SMichael Berg           if (!isLoweredToCall(F))
427f95ee607SMichael Berg             continue;
428f95ee607SMichael Berg         }
429f95ee607SMichael Berg         return;
430f95ee607SMichael Berg       }
431f95ee607SMichael Berg 
432f95ee607SMichael Berg       SmallVector<const Value *> Operands(I.operand_values());
433f95ee607SMichael Berg       Cost +=
434f95ee607SMichael Berg           getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
435f95ee607SMichael Berg     }
436f95ee607SMichael Berg   }
437f95ee607SMichael Berg 
438f95ee607SMichael Berg   LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
439f95ee607SMichael Berg 
440f95ee607SMichael Berg   UP.Partial = true;
441f95ee607SMichael Berg   UP.Runtime = true;
442f95ee607SMichael Berg   UP.UnrollRemainder = true;
443f95ee607SMichael Berg   UP.UnrollAndJam = true;
444f95ee607SMichael Berg   UP.UnrollAndJamInnerLoopThreshold = 60;
445f95ee607SMichael Berg 
446f95ee607SMichael Berg   // Force unrolling small loops can be very useful because of the branch
447f95ee607SMichael Berg   // taken cost of the backedge.
448f95ee607SMichael Berg   if (Cost < 12)
449f95ee607SMichael Berg     UP.Force = true;
450f95ee607SMichael Berg }
451f95ee607SMichael Berg 
getPeelingPreferences(Loop * L,ScalarEvolution & SE,TTI::PeelingPreferences & PP)452f95ee607SMichael Berg void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
453f95ee607SMichael Berg                                          TTI::PeelingPreferences &PP) {
454f95ee607SMichael Berg   BaseT::getPeelingPreferences(L, SE, PP);
455f95ee607SMichael Berg }
456cc35161dSKito Cheng 
getRegUsageForType(Type * Ty)457ade47bdcSPeter Waller unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) {
458cc35161dSKito Cheng   TypeSize Size = Ty->getPrimitiveSizeInBits();
459cc35161dSKito Cheng   if (Ty->isVectorTy()) {
460cc35161dSKito Cheng     if (Size.isScalable() && ST->hasVInstructions())
461cc35161dSKito Cheng       return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock);
462cc35161dSKito Cheng 
463cc35161dSKito Cheng     if (ST->useRVVForFixedLengthVectors())
464f1b1bcdbSPhilip Reames       return divideCeil(Size, ST->getRealMinVLen());
465cc35161dSKito Cheng   }
466cc35161dSKito Cheng 
467cc35161dSKito Cheng   return BaseT::getRegUsageForType(Ty);
468cc35161dSKito Cheng }
469