196c8bc79SSam Elliott //===-- RISCVTargetTransformInfo.cpp - RISC-V specific TTI ----------------===//
296c8bc79SSam Elliott //
396c8bc79SSam Elliott // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
496c8bc79SSam Elliott // See https://llvm.org/LICENSE.txt for license information.
596c8bc79SSam Elliott // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
696c8bc79SSam Elliott //
796c8bc79SSam Elliott //===----------------------------------------------------------------------===//
896c8bc79SSam Elliott
996c8bc79SSam Elliott #include "RISCVTargetTransformInfo.h"
10387d3c24SCraig Topper #include "MCTargetDesc/RISCVMatInt.h"
1196c8bc79SSam Elliott #include "llvm/Analysis/TargetTransformInfo.h"
1296c8bc79SSam Elliott #include "llvm/CodeGen/BasicTTIImpl.h"
1396c8bc79SSam Elliott #include "llvm/CodeGen/TargetLowering.h"
14ecd7a013SYeting Kuo #include <cmath>
1596c8bc79SSam Elliott using namespace llvm;
1696c8bc79SSam Elliott
1796c8bc79SSam Elliott #define DEBUG_TYPE "riscvtti"
1896c8bc79SSam Elliott
19042394b6SCraig Topper static cl::opt<unsigned> RVVRegisterWidthLMUL(
20042394b6SCraig Topper "riscv-v-register-bit-width-lmul",
21042394b6SCraig Topper cl::desc(
22042394b6SCraig Topper "The LMUL to use for getRegisterBitWidth queries. Affects LMUL used "
23042394b6SCraig Topper "by autovectorized code. Fractional LMULs are not supported."),
24042394b6SCraig Topper cl::init(1), cl::Hidden);
25042394b6SCraig Topper
getIntImmCost(const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)26f9a50f04SSander de Smalen InstructionCost RISCVTTIImpl::getIntImmCost(const APInt &Imm, Type *Ty,
2740574fefSSam Parker TTI::TargetCostKind CostKind) {
2896c8bc79SSam Elliott assert(Ty->isIntegerTy() &&
2996c8bc79SSam Elliott "getIntImmCost can only estimate cost of materialising integers");
3096c8bc79SSam Elliott
3196c8bc79SSam Elliott // We have a Zero register, so 0 is always free.
3296c8bc79SSam Elliott if (Imm == 0)
3396c8bc79SSam Elliott return TTI::TCC_Free;
3496c8bc79SSam Elliott
3596c8bc79SSam Elliott // Otherwise, we check how many instructions it will take to materialise.
3696c8bc79SSam Elliott const DataLayout &DL = getDataLayout();
3796c8bc79SSam Elliott return RISCVMatInt::getIntMatCost(Imm, DL.getTypeSizeInBits(Ty),
384dbb7880SCraig Topper getST()->getFeatureBits());
3996c8bc79SSam Elliott }
4096c8bc79SSam Elliott
getIntImmCostInst(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind,Instruction * Inst)41f9a50f04SSander de Smalen InstructionCost RISCVTTIImpl::getIntImmCostInst(unsigned Opcode, unsigned Idx,
42a3d0dce2SMeera Nakrani const APInt &Imm, Type *Ty,
43a3d0dce2SMeera Nakrani TTI::TargetCostKind CostKind,
44a3d0dce2SMeera Nakrani Instruction *Inst) {
4596c8bc79SSam Elliott assert(Ty->isIntegerTy() &&
4696c8bc79SSam Elliott "getIntImmCost can only estimate cost of materialising integers");
4796c8bc79SSam Elliott
4896c8bc79SSam Elliott // We have a Zero register, so 0 is always free.
4996c8bc79SSam Elliott if (Imm == 0)
5096c8bc79SSam Elliott return TTI::TCC_Free;
5196c8bc79SSam Elliott
5296c8bc79SSam Elliott // Some instructions in RISC-V can take a 12-bit immediate. Some of these are
5396c8bc79SSam Elliott // commutative, in others the immediate comes from a specific argument index.
5496c8bc79SSam Elliott bool Takes12BitImm = false;
5596c8bc79SSam Elliott unsigned ImmArgIdx = ~0U;
5696c8bc79SSam Elliott
5796c8bc79SSam Elliott switch (Opcode) {
5896c8bc79SSam Elliott case Instruction::GetElementPtr:
5996c8bc79SSam Elliott // Never hoist any arguments to a GetElementPtr. CodeGenPrepare will
6096c8bc79SSam Elliott // split up large offsets in GEP into better parts than ConstantHoisting
6196c8bc79SSam Elliott // can.
6296c8bc79SSam Elliott return TTI::TCC_Free;
6396c8bc79SSam Elliott case Instruction::And:
64f7023891SCraig Topper // zext.h
65f7023891SCraig Topper if (Imm == UINT64_C(0xffff) && ST->hasStdExtZbb())
66f7023891SCraig Topper return TTI::TCC_Free;
67f7023891SCraig Topper // zext.w
68bc0d6565SCraig Topper if (Imm == UINT64_C(0xffffffff) && ST->hasStdExtZba())
69f7023891SCraig Topper return TTI::TCC_Free;
70f7023891SCraig Topper LLVM_FALLTHROUGH;
71f7023891SCraig Topper case Instruction::Add:
7296c8bc79SSam Elliott case Instruction::Or:
7396c8bc79SSam Elliott case Instruction::Xor:
7496c8bc79SSam Elliott case Instruction::Mul:
7596c8bc79SSam Elliott Takes12BitImm = true;
7696c8bc79SSam Elliott break;
7796c8bc79SSam Elliott case Instruction::Sub:
7896c8bc79SSam Elliott case Instruction::Shl:
7996c8bc79SSam Elliott case Instruction::LShr:
8096c8bc79SSam Elliott case Instruction::AShr:
8196c8bc79SSam Elliott Takes12BitImm = true;
8296c8bc79SSam Elliott ImmArgIdx = 1;
8396c8bc79SSam Elliott break;
8496c8bc79SSam Elliott default:
8596c8bc79SSam Elliott break;
8696c8bc79SSam Elliott }
8796c8bc79SSam Elliott
8896c8bc79SSam Elliott if (Takes12BitImm) {
8996c8bc79SSam Elliott // Check immediate is the correct argument...
9096c8bc79SSam Elliott if (Instruction::isCommutative(Opcode) || Idx == ImmArgIdx) {
9196c8bc79SSam Elliott // ... and fits into the 12-bit immediate.
92e0831dacSAlex Bradbury if (Imm.getMinSignedBits() <= 64 &&
93e0831dacSAlex Bradbury getTLI()->isLegalAddImmediate(Imm.getSExtValue())) {
9496c8bc79SSam Elliott return TTI::TCC_Free;
9596c8bc79SSam Elliott }
96e0831dacSAlex Bradbury }
9796c8bc79SSam Elliott
9896c8bc79SSam Elliott // Otherwise, use the full materialisation cost.
9940574fefSSam Parker return getIntImmCost(Imm, Ty, CostKind);
10096c8bc79SSam Elliott }
10196c8bc79SSam Elliott
10296c8bc79SSam Elliott // By default, prevent hoisting.
10396c8bc79SSam Elliott return TTI::TCC_Free;
10496c8bc79SSam Elliott }
10596c8bc79SSam Elliott
106f9a50f04SSander de Smalen InstructionCost
getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)107f9a50f04SSander de Smalen RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
10840574fefSSam Parker const APInt &Imm, Type *Ty,
10940574fefSSam Parker TTI::TargetCostKind CostKind) {
11096c8bc79SSam Elliott // Prevent hoisting in unknown cases.
11196c8bc79SSam Elliott return TTI::TCC_Free;
11296c8bc79SSam Elliott }
113e046c0c2SFraser Cormack
114f24f09d2SCraig Topper TargetTransformInfo::PopcntSupportKind
getPopcntSupport(unsigned TyWidth)115f24f09d2SCraig Topper RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) {
116f24f09d2SCraig Topper assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2");
117f24f09d2SCraig Topper return ST->hasStdExtZbb() ? TTI::PSK_FastHardware : TTI::PSK_Software;
118f24f09d2SCraig Topper }
119f24f09d2SCraig Topper
shouldExpandReduction(const IntrinsicInst * II) const120e046c0c2SFraser Cormack bool RISCVTTIImpl::shouldExpandReduction(const IntrinsicInst *II) const {
121e046c0c2SFraser Cormack // Currently, the ExpandReductions pass can't expand scalable-vector
122e046c0c2SFraser Cormack // reductions, but we still request expansion as RVV doesn't support certain
123e046c0c2SFraser Cormack // reductions and the SelectionDAG can't legalize them either.
124e046c0c2SFraser Cormack switch (II->getIntrinsicID()) {
125e046c0c2SFraser Cormack default:
126e046c0c2SFraser Cormack return false;
127b46aac12SFraser Cormack // These reductions have no equivalent in RVV
128e046c0c2SFraser Cormack case Intrinsic::vector_reduce_mul:
129e046c0c2SFraser Cormack case Intrinsic::vector_reduce_fmul:
130e046c0c2SFraser Cormack return true;
131e046c0c2SFraser Cormack }
132e046c0c2SFraser Cormack }
133a5b07a22SHsiangkai Wang
getMaxVScale() const134a5b07a22SHsiangkai Wang Optional<unsigned> RISCVTTIImpl::getMaxVScale() const {
1354710e789SPhilip Reames if (ST->hasVInstructions())
1364710e789SPhilip Reames return ST->getRealMaxVLen() / RISCV::RVVBitsPerBlock;
137a5b07a22SHsiangkai Wang return BaseT::getMaxVScale();
138a5b07a22SHsiangkai Wang }
139512bae81SCraig Topper
getVScaleForTuning() const1409803b0d1SPhilip Reames Optional<unsigned> RISCVTTIImpl::getVScaleForTuning() const {
1419803b0d1SPhilip Reames if (ST->hasVInstructions())
1429803b0d1SPhilip Reames return ST->getRealMinVLen() / RISCV::RVVBitsPerBlock;
1439803b0d1SPhilip Reames return BaseT::getVScaleForTuning();
1449803b0d1SPhilip Reames }
1459803b0d1SPhilip Reames
146042394b6SCraig Topper TypeSize
getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const147042394b6SCraig Topper RISCVTTIImpl::getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const {
148042394b6SCraig Topper unsigned LMUL = PowerOf2Floor(
149042394b6SCraig Topper std::max<unsigned>(std::min<unsigned>(RVVRegisterWidthLMUL, 8), 1));
150042394b6SCraig Topper switch (K) {
151042394b6SCraig Topper case TargetTransformInfo::RGK_Scalar:
152042394b6SCraig Topper return TypeSize::getFixed(ST->getXLen());
153042394b6SCraig Topper case TargetTransformInfo::RGK_FixedWidthVector:
154042394b6SCraig Topper return TypeSize::getFixed(
155f1b1bcdbSPhilip Reames ST->useRVVForFixedLengthVectors() ? LMUL * ST->getRealMinVLen() : 0);
156042394b6SCraig Topper case TargetTransformInfo::RGK_ScalableVector:
157042394b6SCraig Topper return TypeSize::getScalable(
158042394b6SCraig Topper ST->hasVInstructions() ? LMUL * RISCV::RVVBitsPerBlock : 0);
159042394b6SCraig Topper }
160042394b6SCraig Topper
161042394b6SCraig Topper llvm_unreachable("Unsupported register kind");
162042394b6SCraig Topper }
163042394b6SCraig Topper
getSpliceCost(VectorType * Tp,int Index)16409629215SCraig Topper InstructionCost RISCVTTIImpl::getSpliceCost(VectorType *Tp, int Index) {
16509629215SCraig Topper std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
16609629215SCraig Topper
16709629215SCraig Topper unsigned Cost = 2; // vslidedown+vslideup.
16809629215SCraig Topper // TODO: LMUL should increase cost.
16909629215SCraig Topper // TODO: Multiplying by LT.first implies this legalizes into multiple copies
17009629215SCraig Topper // of similar code, but I think we expand through memory.
17109629215SCraig Topper return Cost * LT.first;
17209629215SCraig Topper }
17309629215SCraig Topper
getShuffleCost(TTI::ShuffleKind Kind,VectorType * Tp,ArrayRef<int> Mask,int Index,VectorType * SubTp,ArrayRef<const Value * > Args)17409629215SCraig Topper InstructionCost RISCVTTIImpl::getShuffleCost(TTI::ShuffleKind Kind,
17509629215SCraig Topper VectorType *Tp, ArrayRef<int> Mask,
17639aa202aSVasileios Porpodas int Index, VectorType *SubTp,
177fa8a9feaSVasileios Porpodas ArrayRef<const Value *> Args) {
1783fa58762SPhilip Reames if (isa<ScalableVectorType>(Tp)) {
1790e29a80fSPhilip Reames std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Tp);
1803fa58762SPhilip Reames switch (Kind) {
1813fa58762SPhilip Reames default:
1823fa58762SPhilip Reames // Fallthrough to generic handling.
1833fa58762SPhilip Reames // TODO: Most of these cases will return getInvalid in generic code, and
1843fa58762SPhilip Reames // must be implemented here.
1853fa58762SPhilip Reames break;
1863fa58762SPhilip Reames case TTI::SK_Broadcast: {
18703a36542SLiaoChunyu return LT.first * 1;
1883fa58762SPhilip Reames }
1893fa58762SPhilip Reames case TTI::SK_Splice:
1903fa58762SPhilip Reames return getSpliceCost(Tp, Index);
1910e29a80fSPhilip Reames case TTI::SK_Reverse:
1920e29a80fSPhilip Reames // Most of the cost here is producing the vrgather index register
1930e29a80fSPhilip Reames // Example sequence:
1940e29a80fSPhilip Reames // csrr a0, vlenb
1950e29a80fSPhilip Reames // srli a0, a0, 3
1960e29a80fSPhilip Reames // addi a0, a0, -1
1970e29a80fSPhilip Reames // vsetvli a1, zero, e8, mf8, ta, mu (ignored)
1980e29a80fSPhilip Reames // vid.v v9
1990e29a80fSPhilip Reames // vrsub.vx v10, v9, a0
2000e29a80fSPhilip Reames // vrgather.vv v9, v8, v10
201*dca821d8SLian Wang if (Tp->getElementType()->isIntegerTy(1))
202*dca821d8SLian Wang // Mask operation additionally required extend and truncate
203*dca821d8SLian Wang return LT.first * 9;
2040e29a80fSPhilip Reames return LT.first * 6;
2053fa58762SPhilip Reames }
2063fa58762SPhilip Reames }
20703a36542SLiaoChunyu
20809629215SCraig Topper return BaseT::getShuffleCost(Kind, Tp, Mask, Index, SubTp);
20909629215SCraig Topper }
21009629215SCraig Topper
21189f15fc6SAlex Tsao InstructionCost
getMaskedMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind)21289f15fc6SAlex Tsao RISCVTTIImpl::getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
21389f15fc6SAlex Tsao unsigned AddressSpace,
21489f15fc6SAlex Tsao TTI::TargetCostKind CostKind) {
21589f15fc6SAlex Tsao if (!isa<ScalableVectorType>(Src))
21689f15fc6SAlex Tsao return BaseT::getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
21789f15fc6SAlex Tsao CostKind);
21889f15fc6SAlex Tsao
21989f15fc6SAlex Tsao return getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, CostKind);
22089f15fc6SAlex Tsao }
22189f15fc6SAlex Tsao
getGatherScatterOpCost(unsigned Opcode,Type * DataTy,const Value * Ptr,bool VariableMask,Align Alignment,TTI::TargetCostKind CostKind,const Instruction * I)222fd1f8a54SSander de Smalen InstructionCost RISCVTTIImpl::getGatherScatterOpCost(
223512bae81SCraig Topper unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
224512bae81SCraig Topper Align Alignment, TTI::TargetCostKind CostKind, const Instruction *I) {
225512bae81SCraig Topper if (CostKind != TTI::TCK_RecipThroughput)
226512bae81SCraig Topper return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
227512bae81SCraig Topper Alignment, CostKind, I);
228512bae81SCraig Topper
229512bae81SCraig Topper if ((Opcode == Instruction::Load &&
230512bae81SCraig Topper !isLegalMaskedGather(DataTy, Align(Alignment))) ||
231512bae81SCraig Topper (Opcode == Instruction::Store &&
232512bae81SCraig Topper !isLegalMaskedScatter(DataTy, Align(Alignment))))
233512bae81SCraig Topper return BaseT::getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
234512bae81SCraig Topper Alignment, CostKind, I);
235512bae81SCraig Topper
236d764aa7fSPhilip Reames // Cost is proportional to the number of memory operations implied. For
237d764aa7fSPhilip Reames // scalable vectors, we use an upper bound on that number since we don't
238d764aa7fSPhilip Reames // know exactly what VL will be.
239d764aa7fSPhilip Reames auto &VTy = *cast<VectorType>(DataTy);
240d764aa7fSPhilip Reames InstructionCost MemOpCost = getMemoryOpCost(Opcode, VTy.getElementType(),
241d764aa7fSPhilip Reames Alignment, 0, CostKind, I);
242aadc9d26SPhilip Reames unsigned NumLoads = getMaxVLFor(&VTy);
243512bae81SCraig Topper return NumLoads * MemOpCost;
244512bae81SCraig Topper }
245f95ee607SMichael Berg
246505fce5aSLiaoChunyu InstructionCost
getIntrinsicInstrCost(const IntrinsicCostAttributes & ICA,TTI::TargetCostKind CostKind)247505fce5aSLiaoChunyu RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
248505fce5aSLiaoChunyu TTI::TargetCostKind CostKind) {
249505fce5aSLiaoChunyu auto *RetTy = ICA.getReturnType();
250505fce5aSLiaoChunyu switch (ICA.getID()) {
251505fce5aSLiaoChunyu // TODO: add more intrinsic
252505fce5aSLiaoChunyu case Intrinsic::experimental_stepvector: {
253505fce5aSLiaoChunyu unsigned Cost = 1; // vid
254505fce5aSLiaoChunyu auto LT = TLI->getTypeLegalizationCost(DL, RetTy);
255505fce5aSLiaoChunyu return Cost + (LT.first - 1);
256505fce5aSLiaoChunyu }
257505fce5aSLiaoChunyu default:
258505fce5aSLiaoChunyu break;
259505fce5aSLiaoChunyu }
260505fce5aSLiaoChunyu return BaseT::getIntrinsicInstrCost(ICA, CostKind);
261505fce5aSLiaoChunyu }
262505fce5aSLiaoChunyu
getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,TTI::CastContextHint CCH,TTI::TargetCostKind CostKind,const Instruction * I)263ecd7a013SYeting Kuo InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
264ecd7a013SYeting Kuo Type *Src,
265ecd7a013SYeting Kuo TTI::CastContextHint CCH,
266ecd7a013SYeting Kuo TTI::TargetCostKind CostKind,
267ecd7a013SYeting Kuo const Instruction *I) {
268ecd7a013SYeting Kuo if (isa<VectorType>(Dst) && isa<VectorType>(Src)) {
269ecd7a013SYeting Kuo // FIXME: Need to compute legalizing cost for illegal types.
270ecd7a013SYeting Kuo if (!isTypeLegal(Src) || !isTypeLegal(Dst))
271ecd7a013SYeting Kuo return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
272ecd7a013SYeting Kuo
273ecd7a013SYeting Kuo // Skip if element size of Dst or Src is bigger than ELEN.
27476192182SCraig Topper if (Src->getScalarSizeInBits() > ST->getELEN() ||
27576192182SCraig Topper Dst->getScalarSizeInBits() > ST->getELEN())
276ecd7a013SYeting Kuo return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
277ecd7a013SYeting Kuo
278ecd7a013SYeting Kuo int ISD = TLI->InstructionOpcodeToISD(Opcode);
279ecd7a013SYeting Kuo assert(ISD && "Invalid opcode");
280ecd7a013SYeting Kuo
281ecd7a013SYeting Kuo // FIXME: Need to consider vsetvli and lmul.
282ecd7a013SYeting Kuo int PowDiff = (int)Log2_32(Dst->getScalarSizeInBits()) -
283ecd7a013SYeting Kuo (int)Log2_32(Src->getScalarSizeInBits());
284ecd7a013SYeting Kuo switch (ISD) {
285ecd7a013SYeting Kuo case ISD::SIGN_EXTEND:
286ecd7a013SYeting Kuo case ISD::ZERO_EXTEND:
287ecd7a013SYeting Kuo return 1;
288ecd7a013SYeting Kuo case ISD::TRUNCATE:
289ecd7a013SYeting Kuo case ISD::FP_EXTEND:
290ecd7a013SYeting Kuo case ISD::FP_ROUND:
291ecd7a013SYeting Kuo // Counts of narrow/widen instructions.
292ecd7a013SYeting Kuo return std::abs(PowDiff);
293ecd7a013SYeting Kuo case ISD::FP_TO_SINT:
294ecd7a013SYeting Kuo case ISD::FP_TO_UINT:
295ecd7a013SYeting Kuo case ISD::SINT_TO_FP:
296ecd7a013SYeting Kuo case ISD::UINT_TO_FP:
297ecd7a013SYeting Kuo if (std::abs(PowDiff) <= 1)
298ecd7a013SYeting Kuo return 1;
299ecd7a013SYeting Kuo // Backend could lower (v[sz]ext i8 to double) to vfcvt(v[sz]ext.f8 i8),
300ecd7a013SYeting Kuo // so it only need two conversion.
301ecd7a013SYeting Kuo if (Src->isIntOrIntVectorTy())
302ecd7a013SYeting Kuo return 2;
303ecd7a013SYeting Kuo // Counts of narrow/widen instructions.
304ecd7a013SYeting Kuo return std::abs(PowDiff);
305ecd7a013SYeting Kuo }
306ecd7a013SYeting Kuo }
307ecd7a013SYeting Kuo return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
308ecd7a013SYeting Kuo }
309ecd7a013SYeting Kuo
getMaxVLFor(VectorType * Ty)310aadc9d26SPhilip Reames unsigned RISCVTTIImpl::getMaxVLFor(VectorType *Ty) {
311aadc9d26SPhilip Reames if (isa<ScalableVectorType>(Ty)) {
312aadc9d26SPhilip Reames const unsigned EltSize = DL.getTypeSizeInBits(Ty->getElementType());
313aadc9d26SPhilip Reames const unsigned MinSize = DL.getTypeSizeInBits(Ty).getKnownMinValue();
314aadc9d26SPhilip Reames const unsigned VectorBitsMax = ST->getRealMaxVLen();
315aadc9d26SPhilip Reames return RISCVTargetLowering::computeVLMAX(VectorBitsMax, EltSize, MinSize);
316aadc9d26SPhilip Reames }
317aadc9d26SPhilip Reames return cast<FixedVectorType>(Ty)->getNumElements();
318aadc9d26SPhilip Reames }
319aadc9d26SPhilip Reames
320ae7c6647SYeting Kuo InstructionCost
getMinMaxReductionCost(VectorType * Ty,VectorType * CondTy,bool IsUnsigned,TTI::TargetCostKind CostKind)321ae7c6647SYeting Kuo RISCVTTIImpl::getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
322ae7c6647SYeting Kuo bool IsUnsigned,
323ae7c6647SYeting Kuo TTI::TargetCostKind CostKind) {
324aadc9d26SPhilip Reames if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
325ae7c6647SYeting Kuo return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
326ae7c6647SYeting Kuo
327ae7c6647SYeting Kuo // Skip if scalar size of Ty is bigger than ELEN.
32876192182SCraig Topper if (Ty->getScalarSizeInBits() > ST->getELEN())
329ae7c6647SYeting Kuo return BaseT::getMinMaxReductionCost(Ty, CondTy, IsUnsigned, CostKind);
330ae7c6647SYeting Kuo
331536095a2SPhilip Reames std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
332536095a2SPhilip Reames if (Ty->getElementType()->isIntegerTy(1))
333536095a2SPhilip Reames // vcpop sequences, see vreduction-mask.ll. umax, smin actually only
334536095a2SPhilip Reames // cost 2, but we don't have enough info here so we slightly over cost.
335536095a2SPhilip Reames return (LT.first - 1) + 3;
336536095a2SPhilip Reames
337ae7c6647SYeting Kuo // IR Reduction is composed by two vmv and one rvv reduction instruction.
338ae7c6647SYeting Kuo InstructionCost BaseCost = 2;
339aadc9d26SPhilip Reames unsigned VL = getMaxVLFor(Ty);
340ae7c6647SYeting Kuo return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
341ae7c6647SYeting Kuo }
342ae7c6647SYeting Kuo
343ae7c6647SYeting Kuo InstructionCost
getArithmeticReductionCost(unsigned Opcode,VectorType * Ty,Optional<FastMathFlags> FMF,TTI::TargetCostKind CostKind)344ea690e70SPhilip Reames RISCVTTIImpl::getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
345ae7c6647SYeting Kuo Optional<FastMathFlags> FMF,
346ae7c6647SYeting Kuo TTI::TargetCostKind CostKind) {
347aadc9d26SPhilip Reames if (isa<FixedVectorType>(Ty) && !ST->useRVVForFixedLengthVectors())
348ea690e70SPhilip Reames return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
349ae7c6647SYeting Kuo
350ea690e70SPhilip Reames // Skip if scalar size of Ty is bigger than ELEN.
351ea690e70SPhilip Reames if (Ty->getScalarSizeInBits() > ST->getELEN())
352ea690e70SPhilip Reames return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
353ae7c6647SYeting Kuo
354ae7c6647SYeting Kuo int ISD = TLI->InstructionOpcodeToISD(Opcode);
355ae7c6647SYeting Kuo assert(ISD && "Invalid opcode");
356ae7c6647SYeting Kuo
357ae7c6647SYeting Kuo if (ISD != ISD::ADD && ISD != ISD::OR && ISD != ISD::XOR && ISD != ISD::AND &&
358ae7c6647SYeting Kuo ISD != ISD::FADD)
359ea690e70SPhilip Reames return BaseT::getArithmeticReductionCost(Opcode, Ty, FMF, CostKind);
360ae7c6647SYeting Kuo
361ea690e70SPhilip Reames std::pair<InstructionCost, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
362ea690e70SPhilip Reames if (Ty->getElementType()->isIntegerTy(1))
363536095a2SPhilip Reames // vcpop sequences, see vreduction-mask.ll
364536095a2SPhilip Reames return (LT.first - 1) + (ISD == ISD::AND ? 3 : 2);
365536095a2SPhilip Reames
366ae7c6647SYeting Kuo // IR Reduction is composed by two vmv and one rvv reduction instruction.
367ae7c6647SYeting Kuo InstructionCost BaseCost = 2;
368aadc9d26SPhilip Reames unsigned VL = getMaxVLFor(Ty);
369ae7c6647SYeting Kuo if (TTI::requiresOrderedReduction(FMF))
370ae7c6647SYeting Kuo return (LT.first - 1) + BaseCost + VL;
371ae7c6647SYeting Kuo return (LT.first - 1) + BaseCost + Log2_32_Ceil(VL);
372ae7c6647SYeting Kuo }
373ae7c6647SYeting Kuo
getUnrollingPreferences(Loop * L,ScalarEvolution & SE,TTI::UnrollingPreferences & UP,OptimizationRemarkEmitter * ORE)374f95ee607SMichael Berg void RISCVTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
375f95ee607SMichael Berg TTI::UnrollingPreferences &UP,
376f95ee607SMichael Berg OptimizationRemarkEmitter *ORE) {
377f95ee607SMichael Berg // TODO: More tuning on benchmarks and metrics with changes as needed
378f95ee607SMichael Berg // would apply to all settings below to enable performance.
379f95ee607SMichael Berg
380f95ee607SMichael Berg
381861489afSZi Xuan Wu (Zeson) if (ST->enableDefaultUnroll())
382f95ee607SMichael Berg return BasicTTIImplBase::getUnrollingPreferences(L, SE, UP, ORE);
383f95ee607SMichael Berg
384f95ee607SMichael Berg // Enable Upper bound unrolling universally, not dependant upon the conditions
385f95ee607SMichael Berg // below.
386f95ee607SMichael Berg UP.UpperBound = true;
387f95ee607SMichael Berg
388f95ee607SMichael Berg // Disable loop unrolling for Oz and Os.
389f95ee607SMichael Berg UP.OptSizeThreshold = 0;
390f95ee607SMichael Berg UP.PartialOptSizeThreshold = 0;
391f95ee607SMichael Berg if (L->getHeader()->getParent()->hasOptSize())
392f95ee607SMichael Berg return;
393f95ee607SMichael Berg
394f95ee607SMichael Berg SmallVector<BasicBlock *, 4> ExitingBlocks;
395f95ee607SMichael Berg L->getExitingBlocks(ExitingBlocks);
396f95ee607SMichael Berg LLVM_DEBUG(dbgs() << "Loop has:\n"
397f95ee607SMichael Berg << "Blocks: " << L->getNumBlocks() << "\n"
398f95ee607SMichael Berg << "Exit blocks: " << ExitingBlocks.size() << "\n");
399f95ee607SMichael Berg
400f95ee607SMichael Berg // Only allow another exit other than the latch. This acts as an early exit
401f95ee607SMichael Berg // as it mirrors the profitability calculation of the runtime unroller.
402f95ee607SMichael Berg if (ExitingBlocks.size() > 2)
403f95ee607SMichael Berg return;
404f95ee607SMichael Berg
405f95ee607SMichael Berg // Limit the CFG of the loop body for targets with a branch predictor.
406f95ee607SMichael Berg // Allowing 4 blocks permits if-then-else diamonds in the body.
407f95ee607SMichael Berg if (L->getNumBlocks() > 4)
408f95ee607SMichael Berg return;
409f95ee607SMichael Berg
410f95ee607SMichael Berg // Don't unroll vectorized loops, including the remainder loop
411f95ee607SMichael Berg if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized"))
412f95ee607SMichael Berg return;
413f95ee607SMichael Berg
414f95ee607SMichael Berg // Scan the loop: don't unroll loops with calls as this could prevent
415f95ee607SMichael Berg // inlining.
416f95ee607SMichael Berg InstructionCost Cost = 0;
417f95ee607SMichael Berg for (auto *BB : L->getBlocks()) {
418f95ee607SMichael Berg for (auto &I : *BB) {
419f95ee607SMichael Berg // Initial setting - Don't unroll loops containing vectorized
420f95ee607SMichael Berg // instructions.
421f95ee607SMichael Berg if (I.getType()->isVectorTy())
422f95ee607SMichael Berg return;
423f95ee607SMichael Berg
424f95ee607SMichael Berg if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
425f95ee607SMichael Berg if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
426f95ee607SMichael Berg if (!isLoweredToCall(F))
427f95ee607SMichael Berg continue;
428f95ee607SMichael Berg }
429f95ee607SMichael Berg return;
430f95ee607SMichael Berg }
431f95ee607SMichael Berg
432f95ee607SMichael Berg SmallVector<const Value *> Operands(I.operand_values());
433f95ee607SMichael Berg Cost +=
434f95ee607SMichael Berg getUserCost(&I, Operands, TargetTransformInfo::TCK_SizeAndLatency);
435f95ee607SMichael Berg }
436f95ee607SMichael Berg }
437f95ee607SMichael Berg
438f95ee607SMichael Berg LLVM_DEBUG(dbgs() << "Cost of loop: " << Cost << "\n");
439f95ee607SMichael Berg
440f95ee607SMichael Berg UP.Partial = true;
441f95ee607SMichael Berg UP.Runtime = true;
442f95ee607SMichael Berg UP.UnrollRemainder = true;
443f95ee607SMichael Berg UP.UnrollAndJam = true;
444f95ee607SMichael Berg UP.UnrollAndJamInnerLoopThreshold = 60;
445f95ee607SMichael Berg
446f95ee607SMichael Berg // Force unrolling small loops can be very useful because of the branch
447f95ee607SMichael Berg // taken cost of the backedge.
448f95ee607SMichael Berg if (Cost < 12)
449f95ee607SMichael Berg UP.Force = true;
450f95ee607SMichael Berg }
451f95ee607SMichael Berg
getPeelingPreferences(Loop * L,ScalarEvolution & SE,TTI::PeelingPreferences & PP)452f95ee607SMichael Berg void RISCVTTIImpl::getPeelingPreferences(Loop *L, ScalarEvolution &SE,
453f95ee607SMichael Berg TTI::PeelingPreferences &PP) {
454f95ee607SMichael Berg BaseT::getPeelingPreferences(L, SE, PP);
455f95ee607SMichael Berg }
456cc35161dSKito Cheng
getRegUsageForType(Type * Ty)457ade47bdcSPeter Waller unsigned RISCVTTIImpl::getRegUsageForType(Type *Ty) {
458cc35161dSKito Cheng TypeSize Size = Ty->getPrimitiveSizeInBits();
459cc35161dSKito Cheng if (Ty->isVectorTy()) {
460cc35161dSKito Cheng if (Size.isScalable() && ST->hasVInstructions())
461cc35161dSKito Cheng return divideCeil(Size.getKnownMinValue(), RISCV::RVVBitsPerBlock);
462cc35161dSKito Cheng
463cc35161dSKito Cheng if (ST->useRVVForFixedLengthVectors())
464f1b1bcdbSPhilip Reames return divideCeil(Size, ST->getRealMinVLen());
465cc35161dSKito Cheng }
466cc35161dSKito Cheng
467cc35161dSKito Cheng return BaseT::getRegUsageForType(Ty);
468cc35161dSKito Cheng }
469