1139f7f9bSDimitry Andric //===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===// 2139f7f9bSDimitry Andric // 3139f7f9bSDimitry Andric // The LLVM Compiler Infrastructure 4139f7f9bSDimitry Andric // 5139f7f9bSDimitry Andric // This file is distributed under the University of Illinois Open Source 6139f7f9bSDimitry Andric // License. See LICENSE.TXT for details. 7139f7f9bSDimitry Andric // 8139f7f9bSDimitry Andric //===----------------------------------------------------------------------===// 9139f7f9bSDimitry Andric /// \file 10139f7f9bSDimitry Andric /// This file provides the implementation of a basic TargetTransformInfo pass 11139f7f9bSDimitry Andric /// predicated on the target abstractions present in the target independent 12139f7f9bSDimitry Andric /// code generator. It uses these (primarily TargetLowering) to model as much 13139f7f9bSDimitry Andric /// of the TTI query interface as possible. It is included by most targets so 14139f7f9bSDimitry Andric /// that they can specialize only a small subset of the query space. 15139f7f9bSDimitry Andric /// 16139f7f9bSDimitry Andric //===----------------------------------------------------------------------===// 17139f7f9bSDimitry Andric 18139f7f9bSDimitry Andric #include "llvm/CodeGen/Passes.h" 19*91bc56edSDimitry Andric #include "llvm/Analysis/LoopInfo.h" 20139f7f9bSDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 21*91bc56edSDimitry Andric #include "llvm/Support/CommandLine.h" 22139f7f9bSDimitry Andric #include "llvm/Target/TargetLowering.h" 23*91bc56edSDimitry Andric #include "llvm/Target/TargetSubtargetInfo.h" 24139f7f9bSDimitry Andric #include <utility> 25139f7f9bSDimitry Andric using namespace llvm; 26139f7f9bSDimitry Andric 27*91bc56edSDimitry Andric static cl::opt<unsigned> 28*91bc56edSDimitry Andric PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), 29*91bc56edSDimitry Andric cl::desc("Threshold for partial unrolling"), cl::Hidden); 30*91bc56edSDimitry Andric 31*91bc56edSDimitry Andric #define DEBUG_TYPE "basictti" 32*91bc56edSDimitry Andric 33139f7f9bSDimitry Andric namespace { 34139f7f9bSDimitry Andric 35*91bc56edSDimitry Andric class BasicTTI final : public ImmutablePass, public TargetTransformInfo { 36f785676fSDimitry Andric const TargetMachine *TM; 37139f7f9bSDimitry Andric 38139f7f9bSDimitry Andric /// Estimate the overhead of scalarizing an instruction. Insert and Extract 39139f7f9bSDimitry Andric /// are set if the result needs to be inserted and/or extracted from vectors. 40139f7f9bSDimitry Andric unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 41139f7f9bSDimitry Andric 42*91bc56edSDimitry Andric /// Estimate the cost overhead of SK_Alternate shuffle. 43*91bc56edSDimitry Andric unsigned getAltShuffleOverhead(Type *Ty) const; 44*91bc56edSDimitry Andric 45f785676fSDimitry Andric const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } 46f785676fSDimitry Andric 47139f7f9bSDimitry Andric public: 48*91bc56edSDimitry Andric BasicTTI() : ImmutablePass(ID), TM(nullptr) { 49139f7f9bSDimitry Andric llvm_unreachable("This pass cannot be directly constructed"); 50139f7f9bSDimitry Andric } 51139f7f9bSDimitry Andric 52f785676fSDimitry Andric BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) { 53139f7f9bSDimitry Andric initializeBasicTTIPass(*PassRegistry::getPassRegistry()); 54139f7f9bSDimitry Andric } 55139f7f9bSDimitry Andric 56*91bc56edSDimitry Andric void initializePass() override { 57139f7f9bSDimitry Andric pushTTIStack(this); 58139f7f9bSDimitry Andric } 59139f7f9bSDimitry Andric 60*91bc56edSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 61139f7f9bSDimitry Andric TargetTransformInfo::getAnalysisUsage(AU); 62139f7f9bSDimitry Andric } 63139f7f9bSDimitry Andric 64139f7f9bSDimitry Andric /// Pass identification. 65139f7f9bSDimitry Andric static char ID; 66139f7f9bSDimitry Andric 67139f7f9bSDimitry Andric /// Provide necessary pointer adjustments for the two base classes. 68*91bc56edSDimitry Andric void *getAdjustedAnalysisPointer(const void *ID) override { 69139f7f9bSDimitry Andric if (ID == &TargetTransformInfo::ID) 70139f7f9bSDimitry Andric return (TargetTransformInfo*)this; 71139f7f9bSDimitry Andric return this; 72139f7f9bSDimitry Andric } 73139f7f9bSDimitry Andric 74*91bc56edSDimitry Andric bool hasBranchDivergence() const override; 75f785676fSDimitry Andric 76139f7f9bSDimitry Andric /// \name Scalar TTI Implementations 77139f7f9bSDimitry Andric /// @{ 78139f7f9bSDimitry Andric 79*91bc56edSDimitry Andric bool isLegalAddImmediate(int64_t imm) const override; 80*91bc56edSDimitry Andric bool isLegalICmpImmediate(int64_t imm) const override; 81*91bc56edSDimitry Andric bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 82139f7f9bSDimitry Andric int64_t BaseOffset, bool HasBaseReg, 83*91bc56edSDimitry Andric int64_t Scale) const override; 84*91bc56edSDimitry Andric int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 85f785676fSDimitry Andric int64_t BaseOffset, bool HasBaseReg, 86*91bc56edSDimitry Andric int64_t Scale) const override; 87*91bc56edSDimitry Andric bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 88*91bc56edSDimitry Andric bool isTypeLegal(Type *Ty) const override; 89*91bc56edSDimitry Andric unsigned getJumpBufAlignment() const override; 90*91bc56edSDimitry Andric unsigned getJumpBufSize() const override; 91*91bc56edSDimitry Andric bool shouldBuildLookupTables() const override; 92*91bc56edSDimitry Andric bool haveFastSqrt(Type *Ty) const override; 93*91bc56edSDimitry Andric void getUnrollingPreferences(Loop *L, 94*91bc56edSDimitry Andric UnrollingPreferences &UP) const override; 95139f7f9bSDimitry Andric 96139f7f9bSDimitry Andric /// @} 97139f7f9bSDimitry Andric 98139f7f9bSDimitry Andric /// \name Vector TTI Implementations 99139f7f9bSDimitry Andric /// @{ 100139f7f9bSDimitry Andric 101*91bc56edSDimitry Andric unsigned getNumberOfRegisters(bool Vector) const override; 102*91bc56edSDimitry Andric unsigned getMaximumUnrollFactor() const override; 103*91bc56edSDimitry Andric unsigned getRegisterBitWidth(bool Vector) const override; 104*91bc56edSDimitry Andric unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, 105*91bc56edSDimitry Andric OperandValueKind) const override; 106*91bc56edSDimitry Andric unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, 107*91bc56edSDimitry Andric int Index, Type *SubTp) const override; 108*91bc56edSDimitry Andric unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 109*91bc56edSDimitry Andric Type *Src) const override; 110*91bc56edSDimitry Andric unsigned getCFInstrCost(unsigned Opcode) const override; 111*91bc56edSDimitry Andric unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 112*91bc56edSDimitry Andric Type *CondTy) const override; 113*91bc56edSDimitry Andric unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 114*91bc56edSDimitry Andric unsigned Index) const override; 115*91bc56edSDimitry Andric unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 116*91bc56edSDimitry Andric unsigned AddressSpace) const override; 117*91bc56edSDimitry Andric unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, 118*91bc56edSDimitry Andric ArrayRef<Type*> Tys) const override; 119*91bc56edSDimitry Andric unsigned getNumberOfParts(Type *Tp) const override; 120*91bc56edSDimitry Andric unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override; 121*91bc56edSDimitry Andric unsigned getReductionCost(unsigned Opcode, Type *Ty, 122*91bc56edSDimitry Andric bool IsPairwise) const override; 123139f7f9bSDimitry Andric 124139f7f9bSDimitry Andric /// @} 125139f7f9bSDimitry Andric }; 126139f7f9bSDimitry Andric 127139f7f9bSDimitry Andric } 128139f7f9bSDimitry Andric 129139f7f9bSDimitry Andric INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", 130139f7f9bSDimitry Andric "Target independent code generator's TTI", true, true, false) 131139f7f9bSDimitry Andric char BasicTTI::ID = 0; 132139f7f9bSDimitry Andric 133139f7f9bSDimitry Andric ImmutablePass * 134f785676fSDimitry Andric llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) { 135f785676fSDimitry Andric return new BasicTTI(TM); 136139f7f9bSDimitry Andric } 137139f7f9bSDimitry Andric 138f785676fSDimitry Andric bool BasicTTI::hasBranchDivergence() const { return false; } 139139f7f9bSDimitry Andric 140139f7f9bSDimitry Andric bool BasicTTI::isLegalAddImmediate(int64_t imm) const { 141f785676fSDimitry Andric return getTLI()->isLegalAddImmediate(imm); 142139f7f9bSDimitry Andric } 143139f7f9bSDimitry Andric 144139f7f9bSDimitry Andric bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { 145f785676fSDimitry Andric return getTLI()->isLegalICmpImmediate(imm); 146139f7f9bSDimitry Andric } 147139f7f9bSDimitry Andric 148139f7f9bSDimitry Andric bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 149139f7f9bSDimitry Andric int64_t BaseOffset, bool HasBaseReg, 150139f7f9bSDimitry Andric int64_t Scale) const { 151139f7f9bSDimitry Andric TargetLoweringBase::AddrMode AM; 152139f7f9bSDimitry Andric AM.BaseGV = BaseGV; 153139f7f9bSDimitry Andric AM.BaseOffs = BaseOffset; 154139f7f9bSDimitry Andric AM.HasBaseReg = HasBaseReg; 155139f7f9bSDimitry Andric AM.Scale = Scale; 156f785676fSDimitry Andric return getTLI()->isLegalAddressingMode(AM, Ty); 157f785676fSDimitry Andric } 158f785676fSDimitry Andric 159f785676fSDimitry Andric int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 160f785676fSDimitry Andric int64_t BaseOffset, bool HasBaseReg, 161f785676fSDimitry Andric int64_t Scale) const { 162f785676fSDimitry Andric TargetLoweringBase::AddrMode AM; 163f785676fSDimitry Andric AM.BaseGV = BaseGV; 164f785676fSDimitry Andric AM.BaseOffs = BaseOffset; 165f785676fSDimitry Andric AM.HasBaseReg = HasBaseReg; 166f785676fSDimitry Andric AM.Scale = Scale; 167f785676fSDimitry Andric return getTLI()->getScalingFactorCost(AM, Ty); 168139f7f9bSDimitry Andric } 169139f7f9bSDimitry Andric 170139f7f9bSDimitry Andric bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { 171f785676fSDimitry Andric return getTLI()->isTruncateFree(Ty1, Ty2); 172139f7f9bSDimitry Andric } 173139f7f9bSDimitry Andric 174139f7f9bSDimitry Andric bool BasicTTI::isTypeLegal(Type *Ty) const { 175f785676fSDimitry Andric EVT T = getTLI()->getValueType(Ty); 176f785676fSDimitry Andric return getTLI()->isTypeLegal(T); 177139f7f9bSDimitry Andric } 178139f7f9bSDimitry Andric 179139f7f9bSDimitry Andric unsigned BasicTTI::getJumpBufAlignment() const { 180f785676fSDimitry Andric return getTLI()->getJumpBufAlignment(); 181139f7f9bSDimitry Andric } 182139f7f9bSDimitry Andric 183139f7f9bSDimitry Andric unsigned BasicTTI::getJumpBufSize() const { 184f785676fSDimitry Andric return getTLI()->getJumpBufSize(); 185139f7f9bSDimitry Andric } 186139f7f9bSDimitry Andric 187139f7f9bSDimitry Andric bool BasicTTI::shouldBuildLookupTables() const { 188f785676fSDimitry Andric const TargetLoweringBase *TLI = getTLI(); 189139f7f9bSDimitry Andric return TLI->supportJumpTables() && 190139f7f9bSDimitry Andric (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 191139f7f9bSDimitry Andric TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); 192139f7f9bSDimitry Andric } 193139f7f9bSDimitry Andric 194f785676fSDimitry Andric bool BasicTTI::haveFastSqrt(Type *Ty) const { 195f785676fSDimitry Andric const TargetLoweringBase *TLI = getTLI(); 196f785676fSDimitry Andric EVT VT = TLI->getValueType(Ty); 197f785676fSDimitry Andric return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); 198f785676fSDimitry Andric } 199f785676fSDimitry Andric 200*91bc56edSDimitry Andric void BasicTTI::getUnrollingPreferences(Loop *L, 201*91bc56edSDimitry Andric UnrollingPreferences &UP) const { 202*91bc56edSDimitry Andric // This unrolling functionality is target independent, but to provide some 203*91bc56edSDimitry Andric // motivation for its intended use, for x86: 204*91bc56edSDimitry Andric 205*91bc56edSDimitry Andric // According to the Intel 64 and IA-32 Architectures Optimization Reference 206*91bc56edSDimitry Andric // Manual, Intel Core models and later have a loop stream detector 207*91bc56edSDimitry Andric // (and associated uop queue) that can benefit from partial unrolling. 208*91bc56edSDimitry Andric // The relevant requirements are: 209*91bc56edSDimitry Andric // - The loop must have no more than 4 (8 for Nehalem and later) branches 210*91bc56edSDimitry Andric // taken, and none of them may be calls. 211*91bc56edSDimitry Andric // - The loop can have no more than 18 (28 for Nehalem and later) uops. 212*91bc56edSDimitry Andric 213*91bc56edSDimitry Andric // According to the Software Optimization Guide for AMD Family 15h Processors, 214*91bc56edSDimitry Andric // models 30h-4fh (Steamroller and later) have a loop predictor and loop 215*91bc56edSDimitry Andric // buffer which can benefit from partial unrolling. 216*91bc56edSDimitry Andric // The relevant requirements are: 217*91bc56edSDimitry Andric // - The loop must have fewer than 16 branches 218*91bc56edSDimitry Andric // - The loop must have less than 40 uops in all executed loop branches 219*91bc56edSDimitry Andric 220*91bc56edSDimitry Andric // The number of taken branches in a loop is hard to estimate here, and 221*91bc56edSDimitry Andric // benchmarking has revealed that it is better not to be conservative when 222*91bc56edSDimitry Andric // estimating the branch count. As a result, we'll ignore the branch limits 223*91bc56edSDimitry Andric // until someone finds a case where it matters in practice. 224*91bc56edSDimitry Andric 225*91bc56edSDimitry Andric unsigned MaxOps; 226*91bc56edSDimitry Andric const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(); 227*91bc56edSDimitry Andric if (PartialUnrollingThreshold.getNumOccurrences() > 0) 228*91bc56edSDimitry Andric MaxOps = PartialUnrollingThreshold; 229*91bc56edSDimitry Andric else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0) 230*91bc56edSDimitry Andric MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize; 231*91bc56edSDimitry Andric else 232*91bc56edSDimitry Andric return; 233*91bc56edSDimitry Andric 234*91bc56edSDimitry Andric // Scan the loop: don't unroll loops with calls. 235*91bc56edSDimitry Andric for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 236*91bc56edSDimitry Andric I != E; ++I) { 237*91bc56edSDimitry Andric BasicBlock *BB = *I; 238*91bc56edSDimitry Andric 239*91bc56edSDimitry Andric for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) 240*91bc56edSDimitry Andric if (isa<CallInst>(J) || isa<InvokeInst>(J)) { 241*91bc56edSDimitry Andric ImmutableCallSite CS(J); 242*91bc56edSDimitry Andric if (const Function *F = CS.getCalledFunction()) { 243*91bc56edSDimitry Andric if (!TopTTI->isLoweredToCall(F)) 244*91bc56edSDimitry Andric continue; 245*91bc56edSDimitry Andric } 246*91bc56edSDimitry Andric 247*91bc56edSDimitry Andric return; 248*91bc56edSDimitry Andric } 249*91bc56edSDimitry Andric } 250*91bc56edSDimitry Andric 251*91bc56edSDimitry Andric // Enable runtime and partial unrolling up to the specified size. 252*91bc56edSDimitry Andric UP.Partial = UP.Runtime = true; 253*91bc56edSDimitry Andric UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; 254*91bc56edSDimitry Andric } 255f785676fSDimitry Andric 256139f7f9bSDimitry Andric //===----------------------------------------------------------------------===// 257139f7f9bSDimitry Andric // 258139f7f9bSDimitry Andric // Calls used by the vectorizers. 259139f7f9bSDimitry Andric // 260139f7f9bSDimitry Andric //===----------------------------------------------------------------------===// 261139f7f9bSDimitry Andric 262139f7f9bSDimitry Andric unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert, 263139f7f9bSDimitry Andric bool Extract) const { 264139f7f9bSDimitry Andric assert (Ty->isVectorTy() && "Can only scalarize vectors"); 265139f7f9bSDimitry Andric unsigned Cost = 0; 266139f7f9bSDimitry Andric 267139f7f9bSDimitry Andric for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { 268139f7f9bSDimitry Andric if (Insert) 269139f7f9bSDimitry Andric Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); 270139f7f9bSDimitry Andric if (Extract) 271139f7f9bSDimitry Andric Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); 272139f7f9bSDimitry Andric } 273139f7f9bSDimitry Andric 274139f7f9bSDimitry Andric return Cost; 275139f7f9bSDimitry Andric } 276139f7f9bSDimitry Andric 277139f7f9bSDimitry Andric unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { 278139f7f9bSDimitry Andric return 1; 279139f7f9bSDimitry Andric } 280139f7f9bSDimitry Andric 281139f7f9bSDimitry Andric unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { 282139f7f9bSDimitry Andric return 32; 283139f7f9bSDimitry Andric } 284139f7f9bSDimitry Andric 285139f7f9bSDimitry Andric unsigned BasicTTI::getMaximumUnrollFactor() const { 286139f7f9bSDimitry Andric return 1; 287139f7f9bSDimitry Andric } 288139f7f9bSDimitry Andric 289139f7f9bSDimitry Andric unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, 290139f7f9bSDimitry Andric OperandValueKind, 291139f7f9bSDimitry Andric OperandValueKind) const { 292139f7f9bSDimitry Andric // Check if any of the operands are vector operands. 293f785676fSDimitry Andric const TargetLoweringBase *TLI = getTLI(); 294139f7f9bSDimitry Andric int ISD = TLI->InstructionOpcodeToISD(Opcode); 295139f7f9bSDimitry Andric assert(ISD && "Invalid opcode"); 296139f7f9bSDimitry Andric 297139f7f9bSDimitry Andric std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); 298139f7f9bSDimitry Andric 299284c1978SDimitry Andric bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); 300284c1978SDimitry Andric // Assume that floating point arithmetic operations cost twice as much as 301284c1978SDimitry Andric // integer operations. 302284c1978SDimitry Andric unsigned OpCost = (IsFloat ? 2 : 1); 303284c1978SDimitry Andric 304139f7f9bSDimitry Andric if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { 305139f7f9bSDimitry Andric // The operation is legal. Assume it costs 1. 306284c1978SDimitry Andric // If the type is split to multiple registers, assume that there is some 307139f7f9bSDimitry Andric // overhead to this. 308139f7f9bSDimitry Andric // TODO: Once we have extract/insert subvector cost we need to use them. 309139f7f9bSDimitry Andric if (LT.first > 1) 310284c1978SDimitry Andric return LT.first * 2 * OpCost; 311284c1978SDimitry Andric return LT.first * 1 * OpCost; 312139f7f9bSDimitry Andric } 313139f7f9bSDimitry Andric 314139f7f9bSDimitry Andric if (!TLI->isOperationExpand(ISD, LT.second)) { 315139f7f9bSDimitry Andric // If the operation is custom lowered then assume 316139f7f9bSDimitry Andric // thare the code is twice as expensive. 317284c1978SDimitry Andric return LT.first * 2 * OpCost; 318139f7f9bSDimitry Andric } 319139f7f9bSDimitry Andric 320139f7f9bSDimitry Andric // Else, assume that we need to scalarize this op. 321139f7f9bSDimitry Andric if (Ty->isVectorTy()) { 322139f7f9bSDimitry Andric unsigned Num = Ty->getVectorNumElements(); 323139f7f9bSDimitry Andric unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType()); 324139f7f9bSDimitry Andric // return the cost of multiple scalar invocation plus the cost of inserting 325139f7f9bSDimitry Andric // and extracting the values. 326139f7f9bSDimitry Andric return getScalarizationOverhead(Ty, true, true) + Num * Cost; 327139f7f9bSDimitry Andric } 328139f7f9bSDimitry Andric 329139f7f9bSDimitry Andric // We don't know anything about this scalar instruction. 330284c1978SDimitry Andric return OpCost; 331139f7f9bSDimitry Andric } 332139f7f9bSDimitry Andric 333*91bc56edSDimitry Andric unsigned BasicTTI::getAltShuffleOverhead(Type *Ty) const { 334*91bc56edSDimitry Andric assert(Ty->isVectorTy() && "Can only shuffle vectors"); 335*91bc56edSDimitry Andric unsigned Cost = 0; 336*91bc56edSDimitry Andric // Shuffle cost is equal to the cost of extracting element from its argument 337*91bc56edSDimitry Andric // plus the cost of inserting them onto the result vector. 338*91bc56edSDimitry Andric 339*91bc56edSDimitry Andric // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from index 340*91bc56edSDimitry Andric // 0 of first vector, index 1 of second vector,index 2 of first vector and 341*91bc56edSDimitry Andric // finally index 3 of second vector and insert them at index <0,1,2,3> of 342*91bc56edSDimitry Andric // result vector. 343*91bc56edSDimitry Andric for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { 344*91bc56edSDimitry Andric Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); 345*91bc56edSDimitry Andric Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); 346*91bc56edSDimitry Andric } 347*91bc56edSDimitry Andric return Cost; 348*91bc56edSDimitry Andric } 349*91bc56edSDimitry Andric 350139f7f9bSDimitry Andric unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 351139f7f9bSDimitry Andric Type *SubTp) const { 352*91bc56edSDimitry Andric if (Kind == SK_Alternate) { 353*91bc56edSDimitry Andric return getAltShuffleOverhead(Tp); 354*91bc56edSDimitry Andric } 355139f7f9bSDimitry Andric return 1; 356139f7f9bSDimitry Andric } 357139f7f9bSDimitry Andric 358139f7f9bSDimitry Andric unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, 359139f7f9bSDimitry Andric Type *Src) const { 360f785676fSDimitry Andric const TargetLoweringBase *TLI = getTLI(); 361139f7f9bSDimitry Andric int ISD = TLI->InstructionOpcodeToISD(Opcode); 362139f7f9bSDimitry Andric assert(ISD && "Invalid opcode"); 363139f7f9bSDimitry Andric 364139f7f9bSDimitry Andric std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src); 365139f7f9bSDimitry Andric std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst); 366139f7f9bSDimitry Andric 367139f7f9bSDimitry Andric // Check for NOOP conversions. 368139f7f9bSDimitry Andric if (SrcLT.first == DstLT.first && 369139f7f9bSDimitry Andric SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { 370139f7f9bSDimitry Andric 371139f7f9bSDimitry Andric // Bitcast between types that are legalized to the same type are free. 372139f7f9bSDimitry Andric if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) 373139f7f9bSDimitry Andric return 0; 374139f7f9bSDimitry Andric } 375139f7f9bSDimitry Andric 376139f7f9bSDimitry Andric if (Opcode == Instruction::Trunc && 377139f7f9bSDimitry Andric TLI->isTruncateFree(SrcLT.second, DstLT.second)) 378139f7f9bSDimitry Andric return 0; 379139f7f9bSDimitry Andric 380139f7f9bSDimitry Andric if (Opcode == Instruction::ZExt && 381139f7f9bSDimitry Andric TLI->isZExtFree(SrcLT.second, DstLT.second)) 382139f7f9bSDimitry Andric return 0; 383139f7f9bSDimitry Andric 384139f7f9bSDimitry Andric // If the cast is marked as legal (or promote) then assume low cost. 385*91bc56edSDimitry Andric if (SrcLT.first == DstLT.first && 386*91bc56edSDimitry Andric TLI->isOperationLegalOrPromote(ISD, DstLT.second)) 387139f7f9bSDimitry Andric return 1; 388139f7f9bSDimitry Andric 389139f7f9bSDimitry Andric // Handle scalar conversions. 390139f7f9bSDimitry Andric if (!Src->isVectorTy() && !Dst->isVectorTy()) { 391139f7f9bSDimitry Andric 392139f7f9bSDimitry Andric // Scalar bitcasts are usually free. 393139f7f9bSDimitry Andric if (Opcode == Instruction::BitCast) 394139f7f9bSDimitry Andric return 0; 395139f7f9bSDimitry Andric 396139f7f9bSDimitry Andric // Just check the op cost. If the operation is legal then assume it costs 1. 397139f7f9bSDimitry Andric if (!TLI->isOperationExpand(ISD, DstLT.second)) 398139f7f9bSDimitry Andric return 1; 399139f7f9bSDimitry Andric 400139f7f9bSDimitry Andric // Assume that illegal scalar instruction are expensive. 401139f7f9bSDimitry Andric return 4; 402139f7f9bSDimitry Andric } 403139f7f9bSDimitry Andric 404139f7f9bSDimitry Andric // Check vector-to-vector casts. 405139f7f9bSDimitry Andric if (Dst->isVectorTy() && Src->isVectorTy()) { 406139f7f9bSDimitry Andric 407139f7f9bSDimitry Andric // If the cast is between same-sized registers, then the check is simple. 408139f7f9bSDimitry Andric if (SrcLT.first == DstLT.first && 409139f7f9bSDimitry Andric SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { 410139f7f9bSDimitry Andric 411139f7f9bSDimitry Andric // Assume that Zext is done using AND. 412139f7f9bSDimitry Andric if (Opcode == Instruction::ZExt) 413139f7f9bSDimitry Andric return 1; 414139f7f9bSDimitry Andric 415139f7f9bSDimitry Andric // Assume that sext is done using SHL and SRA. 416139f7f9bSDimitry Andric if (Opcode == Instruction::SExt) 417139f7f9bSDimitry Andric return 2; 418139f7f9bSDimitry Andric 419139f7f9bSDimitry Andric // Just check the op cost. If the operation is legal then assume it costs 420139f7f9bSDimitry Andric // 1 and multiply by the type-legalization overhead. 421139f7f9bSDimitry Andric if (!TLI->isOperationExpand(ISD, DstLT.second)) 422139f7f9bSDimitry Andric return SrcLT.first * 1; 423139f7f9bSDimitry Andric } 424139f7f9bSDimitry Andric 425139f7f9bSDimitry Andric // If we are converting vectors and the operation is illegal, or 426139f7f9bSDimitry Andric // if the vectors are legalized to different types, estimate the 427139f7f9bSDimitry Andric // scalarization costs. 428139f7f9bSDimitry Andric unsigned Num = Dst->getVectorNumElements(); 429139f7f9bSDimitry Andric unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(), 430139f7f9bSDimitry Andric Src->getScalarType()); 431139f7f9bSDimitry Andric 432139f7f9bSDimitry Andric // Return the cost of multiple scalar invocation plus the cost of 433139f7f9bSDimitry Andric // inserting and extracting the values. 434139f7f9bSDimitry Andric return getScalarizationOverhead(Dst, true, true) + Num * Cost; 435139f7f9bSDimitry Andric } 436139f7f9bSDimitry Andric 437139f7f9bSDimitry Andric // We already handled vector-to-vector and scalar-to-scalar conversions. This 438139f7f9bSDimitry Andric // is where we handle bitcast between vectors and scalars. We need to assume 439139f7f9bSDimitry Andric // that the conversion is scalarized in one way or another. 440139f7f9bSDimitry Andric if (Opcode == Instruction::BitCast) 441139f7f9bSDimitry Andric // Illegal bitcasts are done by storing and loading from a stack slot. 442139f7f9bSDimitry Andric return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + 443139f7f9bSDimitry Andric (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); 444139f7f9bSDimitry Andric 445139f7f9bSDimitry Andric llvm_unreachable("Unhandled cast"); 446139f7f9bSDimitry Andric } 447139f7f9bSDimitry Andric 448139f7f9bSDimitry Andric unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { 449139f7f9bSDimitry Andric // Branches are assumed to be predicted. 450139f7f9bSDimitry Andric return 0; 451139f7f9bSDimitry Andric } 452139f7f9bSDimitry Andric 453139f7f9bSDimitry Andric unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 454139f7f9bSDimitry Andric Type *CondTy) const { 455f785676fSDimitry Andric const TargetLoweringBase *TLI = getTLI(); 456139f7f9bSDimitry Andric int ISD = TLI->InstructionOpcodeToISD(Opcode); 457139f7f9bSDimitry Andric assert(ISD && "Invalid opcode"); 458139f7f9bSDimitry Andric 459139f7f9bSDimitry Andric // Selects on vectors are actually vector selects. 460139f7f9bSDimitry Andric if (ISD == ISD::SELECT) { 461139f7f9bSDimitry Andric assert(CondTy && "CondTy must exist"); 462139f7f9bSDimitry Andric if (CondTy->isVectorTy()) 463139f7f9bSDimitry Andric ISD = ISD::VSELECT; 464139f7f9bSDimitry Andric } 465139f7f9bSDimitry Andric 466139f7f9bSDimitry Andric std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); 467139f7f9bSDimitry Andric 468139f7f9bSDimitry Andric if (!TLI->isOperationExpand(ISD, LT.second)) { 469139f7f9bSDimitry Andric // The operation is legal. Assume it costs 1. Multiply 470139f7f9bSDimitry Andric // by the type-legalization overhead. 471139f7f9bSDimitry Andric return LT.first * 1; 472139f7f9bSDimitry Andric } 473139f7f9bSDimitry Andric 474139f7f9bSDimitry Andric // Otherwise, assume that the cast is scalarized. 475139f7f9bSDimitry Andric if (ValTy->isVectorTy()) { 476139f7f9bSDimitry Andric unsigned Num = ValTy->getVectorNumElements(); 477139f7f9bSDimitry Andric if (CondTy) 478139f7f9bSDimitry Andric CondTy = CondTy->getScalarType(); 479139f7f9bSDimitry Andric unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(), 480139f7f9bSDimitry Andric CondTy); 481139f7f9bSDimitry Andric 482139f7f9bSDimitry Andric // Return the cost of multiple scalar invocation plus the cost of inserting 483139f7f9bSDimitry Andric // and extracting the values. 484139f7f9bSDimitry Andric return getScalarizationOverhead(ValTy, true, false) + Num * Cost; 485139f7f9bSDimitry Andric } 486139f7f9bSDimitry Andric 487139f7f9bSDimitry Andric // Unknown scalar opcode. 488139f7f9bSDimitry Andric return 1; 489139f7f9bSDimitry Andric } 490139f7f9bSDimitry Andric 491139f7f9bSDimitry Andric unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, 492139f7f9bSDimitry Andric unsigned Index) const { 493*91bc56edSDimitry Andric std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Val->getScalarType()); 494*91bc56edSDimitry Andric 495*91bc56edSDimitry Andric return LT.first; 496139f7f9bSDimitry Andric } 497139f7f9bSDimitry Andric 498139f7f9bSDimitry Andric unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, 499139f7f9bSDimitry Andric unsigned Alignment, 500139f7f9bSDimitry Andric unsigned AddressSpace) const { 501139f7f9bSDimitry Andric assert(!Src->isVoidTy() && "Invalid type"); 502f785676fSDimitry Andric std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); 503139f7f9bSDimitry Andric 504*91bc56edSDimitry Andric // Assuming that all loads of legal types cost 1. 505*91bc56edSDimitry Andric unsigned Cost = LT.first; 506*91bc56edSDimitry Andric 507*91bc56edSDimitry Andric if (Src->isVectorTy() && 508*91bc56edSDimitry Andric Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { 509*91bc56edSDimitry Andric // This is a vector load that legalizes to a larger type than the vector 510*91bc56edSDimitry Andric // itself. Unless the corresponding extending load or truncating store is 511*91bc56edSDimitry Andric // legal, then this will scalarize. 512*91bc56edSDimitry Andric TargetLowering::LegalizeAction LA = TargetLowering::Expand; 513*91bc56edSDimitry Andric EVT MemVT = getTLI()->getValueType(Src, true); 514*91bc56edSDimitry Andric if (MemVT.isSimple() && MemVT != MVT::Other) { 515*91bc56edSDimitry Andric if (Opcode == Instruction::Store) 516*91bc56edSDimitry Andric LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); 517*91bc56edSDimitry Andric else 518*91bc56edSDimitry Andric LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT()); 519*91bc56edSDimitry Andric } 520*91bc56edSDimitry Andric 521*91bc56edSDimitry Andric if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { 522*91bc56edSDimitry Andric // This is a vector load/store for some illegal type that is scalarized. 523*91bc56edSDimitry Andric // We must account for the cost of building or decomposing the vector. 524*91bc56edSDimitry Andric Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, 525*91bc56edSDimitry Andric Opcode == Instruction::Store); 526*91bc56edSDimitry Andric } 527*91bc56edSDimitry Andric } 528*91bc56edSDimitry Andric 529*91bc56edSDimitry Andric return Cost; 530139f7f9bSDimitry Andric } 531139f7f9bSDimitry Andric 532139f7f9bSDimitry Andric unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, 533139f7f9bSDimitry Andric ArrayRef<Type *> Tys) const { 534139f7f9bSDimitry Andric unsigned ISD = 0; 535139f7f9bSDimitry Andric switch (IID) { 536139f7f9bSDimitry Andric default: { 537139f7f9bSDimitry Andric // Assume that we need to scalarize this intrinsic. 538139f7f9bSDimitry Andric unsigned ScalarizationCost = 0; 539139f7f9bSDimitry Andric unsigned ScalarCalls = 1; 540139f7f9bSDimitry Andric if (RetTy->isVectorTy()) { 541139f7f9bSDimitry Andric ScalarizationCost = getScalarizationOverhead(RetTy, true, false); 542139f7f9bSDimitry Andric ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); 543139f7f9bSDimitry Andric } 544139f7f9bSDimitry Andric for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { 545139f7f9bSDimitry Andric if (Tys[i]->isVectorTy()) { 546139f7f9bSDimitry Andric ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); 547139f7f9bSDimitry Andric ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); 548139f7f9bSDimitry Andric } 549139f7f9bSDimitry Andric } 550139f7f9bSDimitry Andric 551139f7f9bSDimitry Andric return ScalarCalls + ScalarizationCost; 552139f7f9bSDimitry Andric } 553139f7f9bSDimitry Andric // Look for intrinsics that can be lowered directly or turned into a scalar 554139f7f9bSDimitry Andric // intrinsic call. 555139f7f9bSDimitry Andric case Intrinsic::sqrt: ISD = ISD::FSQRT; break; 556139f7f9bSDimitry Andric case Intrinsic::sin: ISD = ISD::FSIN; break; 557139f7f9bSDimitry Andric case Intrinsic::cos: ISD = ISD::FCOS; break; 558139f7f9bSDimitry Andric case Intrinsic::exp: ISD = ISD::FEXP; break; 559139f7f9bSDimitry Andric case Intrinsic::exp2: ISD = ISD::FEXP2; break; 560139f7f9bSDimitry Andric case Intrinsic::log: ISD = ISD::FLOG; break; 561139f7f9bSDimitry Andric case Intrinsic::log10: ISD = ISD::FLOG10; break; 562139f7f9bSDimitry Andric case Intrinsic::log2: ISD = ISD::FLOG2; break; 563139f7f9bSDimitry Andric case Intrinsic::fabs: ISD = ISD::FABS; break; 564f785676fSDimitry Andric case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; 565139f7f9bSDimitry Andric case Intrinsic::floor: ISD = ISD::FFLOOR; break; 566139f7f9bSDimitry Andric case Intrinsic::ceil: ISD = ISD::FCEIL; break; 567139f7f9bSDimitry Andric case Intrinsic::trunc: ISD = ISD::FTRUNC; break; 568f785676fSDimitry Andric case Intrinsic::nearbyint: 569f785676fSDimitry Andric ISD = ISD::FNEARBYINT; break; 570139f7f9bSDimitry Andric case Intrinsic::rint: ISD = ISD::FRINT; break; 571f785676fSDimitry Andric case Intrinsic::round: ISD = ISD::FROUND; break; 572139f7f9bSDimitry Andric case Intrinsic::pow: ISD = ISD::FPOW; break; 573139f7f9bSDimitry Andric case Intrinsic::fma: ISD = ISD::FMA; break; 574*91bc56edSDimitry Andric case Intrinsic::fmuladd: ISD = ISD::FMA; break; 575f785676fSDimitry Andric case Intrinsic::lifetime_start: 576f785676fSDimitry Andric case Intrinsic::lifetime_end: 577f785676fSDimitry Andric return 0; 578139f7f9bSDimitry Andric } 579139f7f9bSDimitry Andric 580f785676fSDimitry Andric const TargetLoweringBase *TLI = getTLI(); 581139f7f9bSDimitry Andric std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); 582139f7f9bSDimitry Andric 583139f7f9bSDimitry Andric if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { 584139f7f9bSDimitry Andric // The operation is legal. Assume it costs 1. 585139f7f9bSDimitry Andric // If the type is split to multiple registers, assume that thre is some 586139f7f9bSDimitry Andric // overhead to this. 587139f7f9bSDimitry Andric // TODO: Once we have extract/insert subvector cost we need to use them. 588139f7f9bSDimitry Andric if (LT.first > 1) 589139f7f9bSDimitry Andric return LT.first * 2; 590139f7f9bSDimitry Andric return LT.first * 1; 591139f7f9bSDimitry Andric } 592139f7f9bSDimitry Andric 593139f7f9bSDimitry Andric if (!TLI->isOperationExpand(ISD, LT.second)) { 594139f7f9bSDimitry Andric // If the operation is custom lowered then assume 595139f7f9bSDimitry Andric // thare the code is twice as expensive. 596139f7f9bSDimitry Andric return LT.first * 2; 597139f7f9bSDimitry Andric } 598139f7f9bSDimitry Andric 599*91bc56edSDimitry Andric // If we can't lower fmuladd into an FMA estimate the cost as a floating 600*91bc56edSDimitry Andric // point mul followed by an add. 601*91bc56edSDimitry Andric if (IID == Intrinsic::fmuladd) 602*91bc56edSDimitry Andric return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + 603*91bc56edSDimitry Andric TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); 604*91bc56edSDimitry Andric 605139f7f9bSDimitry Andric // Else, assume that we need to scalarize this intrinsic. For math builtins 606139f7f9bSDimitry Andric // this will emit a costly libcall, adding call overhead and spills. Make it 607139f7f9bSDimitry Andric // very expensive. 608139f7f9bSDimitry Andric if (RetTy->isVectorTy()) { 609139f7f9bSDimitry Andric unsigned Num = RetTy->getVectorNumElements(); 610139f7f9bSDimitry Andric unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(), 611139f7f9bSDimitry Andric Tys); 612139f7f9bSDimitry Andric return 10 * Cost * Num; 613139f7f9bSDimitry Andric } 614139f7f9bSDimitry Andric 615139f7f9bSDimitry Andric // This is going to be turned into a library call, make it expensive. 616139f7f9bSDimitry Andric return 10; 617139f7f9bSDimitry Andric } 618139f7f9bSDimitry Andric 619139f7f9bSDimitry Andric unsigned BasicTTI::getNumberOfParts(Type *Tp) const { 620f785676fSDimitry Andric std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp); 621139f7f9bSDimitry Andric return LT.first; 622139f7f9bSDimitry Andric } 623139f7f9bSDimitry Andric 624f785676fSDimitry Andric unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { 625139f7f9bSDimitry Andric return 0; 626139f7f9bSDimitry Andric } 627f785676fSDimitry Andric 628f785676fSDimitry Andric unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty, 629f785676fSDimitry Andric bool IsPairwise) const { 630f785676fSDimitry Andric assert(Ty->isVectorTy() && "Expect a vector type"); 631f785676fSDimitry Andric unsigned NumVecElts = Ty->getVectorNumElements(); 632f785676fSDimitry Andric unsigned NumReduxLevels = Log2_32(NumVecElts); 633f785676fSDimitry Andric unsigned ArithCost = NumReduxLevels * 634f785676fSDimitry Andric TopTTI->getArithmeticInstrCost(Opcode, Ty); 635f785676fSDimitry Andric // Assume the pairwise shuffles add a cost. 636f785676fSDimitry Andric unsigned ShuffleCost = 637f785676fSDimitry Andric NumReduxLevels * (IsPairwise + 1) * 638f785676fSDimitry Andric TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); 639f785676fSDimitry Andric return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); 640f785676fSDimitry Andric } 641