1664e354dSChandler Carruth //===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===// 2664e354dSChandler Carruth // 3664e354dSChandler Carruth // The LLVM Compiler Infrastructure 4664e354dSChandler Carruth // 5664e354dSChandler Carruth // This file is distributed under the University of Illinois Open Source 6664e354dSChandler Carruth // License. See LICENSE.TXT for details. 7664e354dSChandler Carruth // 8664e354dSChandler Carruth //===----------------------------------------------------------------------===// 9664e354dSChandler Carruth /// \file 10664e354dSChandler Carruth /// This file provides the implementation of a basic TargetTransformInfo pass 11664e354dSChandler Carruth /// predicated on the target abstractions present in the target independent 12664e354dSChandler Carruth /// code generator. It uses these (primarily TargetLowering) to model as much 13664e354dSChandler Carruth /// of the TTI query interface as possible. It is included by most targets so 14664e354dSChandler Carruth /// that they can specialize only a small subset of the query space. 15664e354dSChandler Carruth /// 16664e354dSChandler Carruth //===----------------------------------------------------------------------===// 17664e354dSChandler Carruth 18664e354dSChandler Carruth #include "llvm/CodeGen/Passes.h" 19*6532c20fSHal Finkel #include "llvm/Analysis/LoopInfo.h" 20d3e73556SChandler Carruth #include "llvm/Analysis/TargetTransformInfo.h" 21*6532c20fSHal Finkel #include "llvm/Support/CommandLine.h" 22664e354dSChandler Carruth #include "llvm/Target/TargetLowering.h" 23*6532c20fSHal Finkel #include "llvm/Target/TargetSubtargetInfo.h" 24664e354dSChandler Carruth #include <utility> 25664e354dSChandler Carruth using namespace llvm; 26664e354dSChandler Carruth 27*6532c20fSHal Finkel static cl::opt<unsigned> 28*6532c20fSHal Finkel PartialUnrollingThreshold("partial-unrolling-threshold", cl::init(0), 29*6532c20fSHal Finkel cl::desc("Threshold for partial unrolling"), cl::Hidden); 30*6532c20fSHal Finkel 311b9dde08SChandler Carruth #define DEBUG_TYPE "basictti" 321b9dde08SChandler Carruth 33664e354dSChandler Carruth namespace { 34664e354dSChandler Carruth 3577dfe45fSCraig Topper class BasicTTI final : public ImmutablePass, public TargetTransformInfo { 36afc1036fSBill Wendling const TargetMachine *TM; 37664e354dSChandler Carruth 38664e354dSChandler Carruth /// Estimate the overhead of scalarizing an instruction. Insert and Extract 39664e354dSChandler Carruth /// are set if the result needs to be inserted and/or extracted from vectors. 40664e354dSChandler Carruth unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 41664e354dSChandler Carruth 42afc1036fSBill Wendling const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } 43afc1036fSBill Wendling 44664e354dSChandler Carruth public: 45c0196b1bSCraig Topper BasicTTI() : ImmutablePass(ID), TM(nullptr) { 46664e354dSChandler Carruth llvm_unreachable("This pass cannot be directly constructed"); 47664e354dSChandler Carruth } 48664e354dSChandler Carruth 49afc1036fSBill Wendling BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) { 50664e354dSChandler Carruth initializeBasicTTIPass(*PassRegistry::getPassRegistry()); 51664e354dSChandler Carruth } 52664e354dSChandler Carruth 5324e685fdSCraig Topper void initializePass() override { 54664e354dSChandler Carruth pushTTIStack(this); 55664e354dSChandler Carruth } 56664e354dSChandler Carruth 5724e685fdSCraig Topper void getAnalysisUsage(AnalysisUsage &AU) const override { 58664e354dSChandler Carruth TargetTransformInfo::getAnalysisUsage(AU); 59664e354dSChandler Carruth } 60664e354dSChandler Carruth 61664e354dSChandler Carruth /// Pass identification. 62664e354dSChandler Carruth static char ID; 63664e354dSChandler Carruth 64664e354dSChandler Carruth /// Provide necessary pointer adjustments for the two base classes. 6524e685fdSCraig Topper void *getAdjustedAnalysisPointer(const void *ID) override { 66664e354dSChandler Carruth if (ID == &TargetTransformInfo::ID) 67664e354dSChandler Carruth return (TargetTransformInfo*)this; 68664e354dSChandler Carruth return this; 69664e354dSChandler Carruth } 70664e354dSChandler Carruth 7124e685fdSCraig Topper bool hasBranchDivergence() const override; 728b1e021eSTom Stellard 73664e354dSChandler Carruth /// \name Scalar TTI Implementations 74664e354dSChandler Carruth /// @{ 75664e354dSChandler Carruth 7624e685fdSCraig Topper bool isLegalAddImmediate(int64_t imm) const override; 7724e685fdSCraig Topper bool isLegalICmpImmediate(int64_t imm) const override; 7824e685fdSCraig Topper bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 79664e354dSChandler Carruth int64_t BaseOffset, bool HasBaseReg, 8073156025SCraig Topper int64_t Scale) const override; 8124e685fdSCraig Topper int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 82bf490d4aSQuentin Colombet int64_t BaseOffset, bool HasBaseReg, 8373156025SCraig Topper int64_t Scale) const override; 8424e685fdSCraig Topper bool isTruncateFree(Type *Ty1, Type *Ty2) const override; 8524e685fdSCraig Topper bool isTypeLegal(Type *Ty) const override; 8624e685fdSCraig Topper unsigned getJumpBufAlignment() const override; 8724e685fdSCraig Topper unsigned getJumpBufSize() const override; 8824e685fdSCraig Topper bool shouldBuildLookupTables() const override; 8924e685fdSCraig Topper bool haveFastSqrt(Type *Ty) const override; 9024e685fdSCraig Topper void getUnrollingPreferences(Loop *L, 9124e685fdSCraig Topper UnrollingPreferences &UP) const override; 92664e354dSChandler Carruth 93664e354dSChandler Carruth /// @} 94664e354dSChandler Carruth 95664e354dSChandler Carruth /// \name Vector TTI Implementations 96664e354dSChandler Carruth /// @{ 97664e354dSChandler Carruth 9824e685fdSCraig Topper unsigned getNumberOfRegisters(bool Vector) const override; 9924e685fdSCraig Topper unsigned getMaximumUnrollFactor() const override; 10024e685fdSCraig Topper unsigned getRegisterBitWidth(bool Vector) const override; 10124e685fdSCraig Topper unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, 10273156025SCraig Topper OperandValueKind) const override; 10324e685fdSCraig Topper unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, 10473156025SCraig Topper int Index, Type *SubTp) const override; 10524e685fdSCraig Topper unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 10673156025SCraig Topper Type *Src) const override; 10724e685fdSCraig Topper unsigned getCFInstrCost(unsigned Opcode) const override; 10824e685fdSCraig Topper unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 10973156025SCraig Topper Type *CondTy) const override; 11024e685fdSCraig Topper unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 11173156025SCraig Topper unsigned Index) const override; 11224e685fdSCraig Topper unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 11373156025SCraig Topper unsigned AddressSpace) const override; 11424e685fdSCraig Topper unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, 11524e685fdSCraig Topper ArrayRef<Type*> Tys) const override; 11624e685fdSCraig Topper unsigned getNumberOfParts(Type *Tp) const override; 11724e685fdSCraig Topper unsigned getAddressComputationCost( Type *Ty, bool IsComplex) const override; 11824e685fdSCraig Topper unsigned getReductionCost(unsigned Opcode, Type *Ty, 11973156025SCraig Topper bool IsPairwise) const override; 120664e354dSChandler Carruth 121664e354dSChandler Carruth /// @} 122664e354dSChandler Carruth }; 123664e354dSChandler Carruth 124664e354dSChandler Carruth } 125664e354dSChandler Carruth 126664e354dSChandler Carruth INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", 127664e354dSChandler Carruth "Target independent code generator's TTI", true, true, false) 128664e354dSChandler Carruth char BasicTTI::ID = 0; 129664e354dSChandler Carruth 130664e354dSChandler Carruth ImmutablePass * 131afc1036fSBill Wendling llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) { 132afc1036fSBill Wendling return new BasicTTI(TM); 133664e354dSChandler Carruth } 134664e354dSChandler Carruth 1358b1e021eSTom Stellard bool BasicTTI::hasBranchDivergence() const { return false; } 136664e354dSChandler Carruth 137664e354dSChandler Carruth bool BasicTTI::isLegalAddImmediate(int64_t imm) const { 138afc1036fSBill Wendling return getTLI()->isLegalAddImmediate(imm); 139664e354dSChandler Carruth } 140664e354dSChandler Carruth 141664e354dSChandler Carruth bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { 142afc1036fSBill Wendling return getTLI()->isLegalICmpImmediate(imm); 143664e354dSChandler Carruth } 144664e354dSChandler Carruth 145664e354dSChandler Carruth bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, 146664e354dSChandler Carruth int64_t BaseOffset, bool HasBaseReg, 147664e354dSChandler Carruth int64_t Scale) const { 14856b31bd9SBenjamin Kramer TargetLoweringBase::AddrMode AM; 149664e354dSChandler Carruth AM.BaseGV = BaseGV; 150664e354dSChandler Carruth AM.BaseOffs = BaseOffset; 151664e354dSChandler Carruth AM.HasBaseReg = HasBaseReg; 152664e354dSChandler Carruth AM.Scale = Scale; 153afc1036fSBill Wendling return getTLI()->isLegalAddressingMode(AM, Ty); 154664e354dSChandler Carruth } 155664e354dSChandler Carruth 156bf490d4aSQuentin Colombet int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 157bf490d4aSQuentin Colombet int64_t BaseOffset, bool HasBaseReg, 158bf490d4aSQuentin Colombet int64_t Scale) const { 159bf490d4aSQuentin Colombet TargetLoweringBase::AddrMode AM; 160bf490d4aSQuentin Colombet AM.BaseGV = BaseGV; 161bf490d4aSQuentin Colombet AM.BaseOffs = BaseOffset; 162bf490d4aSQuentin Colombet AM.HasBaseReg = HasBaseReg; 163bf490d4aSQuentin Colombet AM.Scale = Scale; 164afc1036fSBill Wendling return getTLI()->getScalingFactorCost(AM, Ty); 165bf490d4aSQuentin Colombet } 166bf490d4aSQuentin Colombet 167664e354dSChandler Carruth bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { 168afc1036fSBill Wendling return getTLI()->isTruncateFree(Ty1, Ty2); 169664e354dSChandler Carruth } 170664e354dSChandler Carruth 171664e354dSChandler Carruth bool BasicTTI::isTypeLegal(Type *Ty) const { 172afc1036fSBill Wendling EVT T = getTLI()->getValueType(Ty); 173afc1036fSBill Wendling return getTLI()->isTypeLegal(T); 174664e354dSChandler Carruth } 175664e354dSChandler Carruth 176664e354dSChandler Carruth unsigned BasicTTI::getJumpBufAlignment() const { 177afc1036fSBill Wendling return getTLI()->getJumpBufAlignment(); 178664e354dSChandler Carruth } 179664e354dSChandler Carruth 180664e354dSChandler Carruth unsigned BasicTTI::getJumpBufSize() const { 181afc1036fSBill Wendling return getTLI()->getJumpBufSize(); 182664e354dSChandler Carruth } 183664e354dSChandler Carruth 184664e354dSChandler Carruth bool BasicTTI::shouldBuildLookupTables() const { 185afc1036fSBill Wendling const TargetLoweringBase *TLI = getTLI(); 186664e354dSChandler Carruth return TLI->supportJumpTables() && 187664e354dSChandler Carruth (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || 188664e354dSChandler Carruth TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); 189664e354dSChandler Carruth } 190664e354dSChandler Carruth 19137cd6cfbSRichard Sandiford bool BasicTTI::haveFastSqrt(Type *Ty) const { 19237cd6cfbSRichard Sandiford const TargetLoweringBase *TLI = getTLI(); 19337cd6cfbSRichard Sandiford EVT VT = TLI->getValueType(Ty); 19437cd6cfbSRichard Sandiford return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); 19537cd6cfbSRichard Sandiford } 19637cd6cfbSRichard Sandiford 197*6532c20fSHal Finkel void BasicTTI::getUnrollingPreferences(Loop *L, 198*6532c20fSHal Finkel UnrollingPreferences &UP) const { 199*6532c20fSHal Finkel // This unrolling functionality is target independent, but to provide some 200*6532c20fSHal Finkel // motivation for its indended use, for x86: 201*6532c20fSHal Finkel 202*6532c20fSHal Finkel // According to the Intel 64 and IA-32 Architectures Optimization Reference 203*6532c20fSHal Finkel // Manual, Intel Core models and later have a loop stream detector 204*6532c20fSHal Finkel // (and associated uop queue) that can benefit from partial unrolling. 205*6532c20fSHal Finkel // The relevant requirements are: 206*6532c20fSHal Finkel // - The loop must have no more than 4 (8 for Nehalem and later) branches 207*6532c20fSHal Finkel // taken, and none of them may be calls. 208*6532c20fSHal Finkel // - The loop can have no more than 18 (28 for Nehalem and later) uops. 209*6532c20fSHal Finkel 210*6532c20fSHal Finkel // According to the Software Optimization Guide for AMD Family 15h Processors, 211*6532c20fSHal Finkel // models 30h-4fh (Steamroller and later) have a loop predictor and loop 212*6532c20fSHal Finkel // buffer which can benefit from partial unrolling. 213*6532c20fSHal Finkel // The relevant requirements are: 214*6532c20fSHal Finkel // - The loop must have fewer than 16 branches 215*6532c20fSHal Finkel // - The loop must have less than 40 uops in all executed loop branches 216*6532c20fSHal Finkel 217*6532c20fSHal Finkel // The number of taken branches in a loop is hard to estimate here, and 218*6532c20fSHal Finkel // benchmarking has revealed that it is better not to be conservative when 219*6532c20fSHal Finkel // estimating the branch count. As a result, we'll ignore the branch limits 220*6532c20fSHal Finkel // until someone finds a case where it matters in practice. 221*6532c20fSHal Finkel 222*6532c20fSHal Finkel unsigned MaxOps; 223*6532c20fSHal Finkel const TargetSubtargetInfo *ST = &TM->getSubtarget<TargetSubtargetInfo>(); 224*6532c20fSHal Finkel if (PartialUnrollingThreshold.getNumOccurrences() > 0) 225*6532c20fSHal Finkel MaxOps = PartialUnrollingThreshold; 226*6532c20fSHal Finkel else if (ST->getSchedModel()->LoopMicroOpBufferSize > 0) 227*6532c20fSHal Finkel MaxOps = ST->getSchedModel()->LoopMicroOpBufferSize; 228*6532c20fSHal Finkel else 229*6532c20fSHal Finkel return; 230*6532c20fSHal Finkel 231*6532c20fSHal Finkel // Scan the loop: don't unroll loops with calls. 232*6532c20fSHal Finkel for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); 233*6532c20fSHal Finkel I != E; ++I) { 234*6532c20fSHal Finkel BasicBlock *BB = *I; 235*6532c20fSHal Finkel 236*6532c20fSHal Finkel for (BasicBlock::iterator J = BB->begin(), JE = BB->end(); J != JE; ++J) 237*6532c20fSHal Finkel if (isa<CallInst>(J) || isa<InvokeInst>(J)) { 238*6532c20fSHal Finkel ImmutableCallSite CS(J); 239*6532c20fSHal Finkel if (const Function *F = CS.getCalledFunction()) { 240*6532c20fSHal Finkel if (!TopTTI->isLoweredToCall(F)) 241*6532c20fSHal Finkel continue; 242*6532c20fSHal Finkel } 243*6532c20fSHal Finkel 244*6532c20fSHal Finkel return; 245*6532c20fSHal Finkel } 246*6532c20fSHal Finkel } 247*6532c20fSHal Finkel 248*6532c20fSHal Finkel // Enable runtime and partial unrolling up to the specified size. 249*6532c20fSHal Finkel UP.Partial = UP.Runtime = true; 250*6532c20fSHal Finkel UP.PartialThreshold = UP.PartialOptSizeThreshold = MaxOps; 251*6532c20fSHal Finkel } 2528f2e7005SHal Finkel 253664e354dSChandler Carruth //===----------------------------------------------------------------------===// 254664e354dSChandler Carruth // 255664e354dSChandler Carruth // Calls used by the vectorizers. 256664e354dSChandler Carruth // 257664e354dSChandler Carruth //===----------------------------------------------------------------------===// 258664e354dSChandler Carruth 259664e354dSChandler Carruth unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert, 260664e354dSChandler Carruth bool Extract) const { 261664e354dSChandler Carruth assert (Ty->isVectorTy() && "Can only scalarize vectors"); 262664e354dSChandler Carruth unsigned Cost = 0; 263664e354dSChandler Carruth 264664e354dSChandler Carruth for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { 265664e354dSChandler Carruth if (Insert) 266664e354dSChandler Carruth Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); 267664e354dSChandler Carruth if (Extract) 268664e354dSChandler Carruth Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); 269664e354dSChandler Carruth } 270664e354dSChandler Carruth 271664e354dSChandler Carruth return Cost; 272664e354dSChandler Carruth } 273664e354dSChandler Carruth 274664e354dSChandler Carruth unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { 275664e354dSChandler Carruth return 1; 276664e354dSChandler Carruth } 277664e354dSChandler Carruth 278b1791a75SNadav Rotem unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { 279b1791a75SNadav Rotem return 32; 280b1791a75SNadav Rotem } 281b1791a75SNadav Rotem 282b696c36fSNadav Rotem unsigned BasicTTI::getMaximumUnrollFactor() const { 283b696c36fSNadav Rotem return 1; 284b696c36fSNadav Rotem } 285b696c36fSNadav Rotem 286b9773871SArnold Schwaighofer unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, 287b9773871SArnold Schwaighofer OperandValueKind, 288b9773871SArnold Schwaighofer OperandValueKind) const { 289664e354dSChandler Carruth // Check if any of the operands are vector operands. 290afc1036fSBill Wendling const TargetLoweringBase *TLI = getTLI(); 291664e354dSChandler Carruth int ISD = TLI->InstructionOpcodeToISD(Opcode); 292664e354dSChandler Carruth assert(ISD && "Invalid opcode"); 293664e354dSChandler Carruth 294664e354dSChandler Carruth std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); 295664e354dSChandler Carruth 29687a0af6eSNadav Rotem bool IsFloat = Ty->getScalarType()->isFloatingPointTy(); 2970db0690aSNadav Rotem // Assume that floating point arithmetic operations cost twice as much as 2980db0690aSNadav Rotem // integer operations. 29987a0af6eSNadav Rotem unsigned OpCost = (IsFloat ? 2 : 1); 30087a0af6eSNadav Rotem 301664e354dSChandler Carruth if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { 302664e354dSChandler Carruth // The operation is legal. Assume it costs 1. 3030db0690aSNadav Rotem // If the type is split to multiple registers, assume that there is some 304664e354dSChandler Carruth // overhead to this. 305664e354dSChandler Carruth // TODO: Once we have extract/insert subvector cost we need to use them. 306664e354dSChandler Carruth if (LT.first > 1) 30787a0af6eSNadav Rotem return LT.first * 2 * OpCost; 30887a0af6eSNadav Rotem return LT.first * 1 * OpCost; 309664e354dSChandler Carruth } 310664e354dSChandler Carruth 311664e354dSChandler Carruth if (!TLI->isOperationExpand(ISD, LT.second)) { 312664e354dSChandler Carruth // If the operation is custom lowered then assume 313664e354dSChandler Carruth // thare the code is twice as expensive. 31487a0af6eSNadav Rotem return LT.first * 2 * OpCost; 315664e354dSChandler Carruth } 316664e354dSChandler Carruth 317664e354dSChandler Carruth // Else, assume that we need to scalarize this op. 318664e354dSChandler Carruth if (Ty->isVectorTy()) { 319664e354dSChandler Carruth unsigned Num = Ty->getVectorNumElements(); 320664e354dSChandler Carruth unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType()); 321664e354dSChandler Carruth // return the cost of multiple scalar invocation plus the cost of inserting 322664e354dSChandler Carruth // and extracting the values. 323664e354dSChandler Carruth return getScalarizationOverhead(Ty, true, true) + Num * Cost; 324664e354dSChandler Carruth } 325664e354dSChandler Carruth 326664e354dSChandler Carruth // We don't know anything about this scalar instruction. 32787a0af6eSNadav Rotem return OpCost; 328664e354dSChandler Carruth } 329664e354dSChandler Carruth 330664e354dSChandler Carruth unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 331664e354dSChandler Carruth Type *SubTp) const { 332664e354dSChandler Carruth return 1; 333664e354dSChandler Carruth } 334664e354dSChandler Carruth 335664e354dSChandler Carruth unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, 336664e354dSChandler Carruth Type *Src) const { 337afc1036fSBill Wendling const TargetLoweringBase *TLI = getTLI(); 338664e354dSChandler Carruth int ISD = TLI->InstructionOpcodeToISD(Opcode); 339664e354dSChandler Carruth assert(ISD && "Invalid opcode"); 340664e354dSChandler Carruth 341664e354dSChandler Carruth std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(Src); 342664e354dSChandler Carruth std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(Dst); 343664e354dSChandler Carruth 344e55aa3c8SNadav Rotem // Check for NOOP conversions. 345e55aa3c8SNadav Rotem if (SrcLT.first == DstLT.first && 346e55aa3c8SNadav Rotem SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { 347664e354dSChandler Carruth 348e55aa3c8SNadav Rotem // Bitcast between types that are legalized to the same type are free. 349e55aa3c8SNadav Rotem if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) 350664e354dSChandler Carruth return 0; 351e55aa3c8SNadav Rotem } 352664e354dSChandler Carruth 353664e354dSChandler Carruth if (Opcode == Instruction::Trunc && 354664e354dSChandler Carruth TLI->isTruncateFree(SrcLT.second, DstLT.second)) 355664e354dSChandler Carruth return 0; 356664e354dSChandler Carruth 357664e354dSChandler Carruth if (Opcode == Instruction::ZExt && 358664e354dSChandler Carruth TLI->isZExtFree(SrcLT.second, DstLT.second)) 359664e354dSChandler Carruth return 0; 360664e354dSChandler Carruth 361e55aa3c8SNadav Rotem // If the cast is marked as legal (or promote) then assume low cost. 36255312debSHal Finkel if (SrcLT.first == DstLT.first && 36355312debSHal Finkel TLI->isOperationLegalOrPromote(ISD, DstLT.second)) 364e55aa3c8SNadav Rotem return 1; 365e55aa3c8SNadav Rotem 366e55aa3c8SNadav Rotem // Handle scalar conversions. 367e55aa3c8SNadav Rotem if (!Src->isVectorTy() && !Dst->isVectorTy()) { 368e55aa3c8SNadav Rotem 369e55aa3c8SNadav Rotem // Scalar bitcasts are usually free. 370e55aa3c8SNadav Rotem if (Opcode == Instruction::BitCast) 371e55aa3c8SNadav Rotem return 0; 372e55aa3c8SNadav Rotem 373664e354dSChandler Carruth // Just check the op cost. If the operation is legal then assume it costs 1. 374664e354dSChandler Carruth if (!TLI->isOperationExpand(ISD, DstLT.second)) 375664e354dSChandler Carruth return 1; 376664e354dSChandler Carruth 377664e354dSChandler Carruth // Assume that illegal scalar instruction are expensive. 378664e354dSChandler Carruth return 4; 379664e354dSChandler Carruth } 380664e354dSChandler Carruth 381664e354dSChandler Carruth // Check vector-to-vector casts. 382664e354dSChandler Carruth if (Dst->isVectorTy() && Src->isVectorTy()) { 383664e354dSChandler Carruth 384664e354dSChandler Carruth // If the cast is between same-sized registers, then the check is simple. 385664e354dSChandler Carruth if (SrcLT.first == DstLT.first && 386664e354dSChandler Carruth SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { 387664e354dSChandler Carruth 388664e354dSChandler Carruth // Assume that Zext is done using AND. 389664e354dSChandler Carruth if (Opcode == Instruction::ZExt) 390664e354dSChandler Carruth return 1; 391664e354dSChandler Carruth 392664e354dSChandler Carruth // Assume that sext is done using SHL and SRA. 393664e354dSChandler Carruth if (Opcode == Instruction::SExt) 394664e354dSChandler Carruth return 2; 395664e354dSChandler Carruth 396664e354dSChandler Carruth // Just check the op cost. If the operation is legal then assume it costs 397664e354dSChandler Carruth // 1 and multiply by the type-legalization overhead. 398664e354dSChandler Carruth if (!TLI->isOperationExpand(ISD, DstLT.second)) 399664e354dSChandler Carruth return SrcLT.first * 1; 400664e354dSChandler Carruth } 401664e354dSChandler Carruth 402664e354dSChandler Carruth // If we are converting vectors and the operation is illegal, or 403664e354dSChandler Carruth // if the vectors are legalized to different types, estimate the 404664e354dSChandler Carruth // scalarization costs. 405664e354dSChandler Carruth unsigned Num = Dst->getVectorNumElements(); 406664e354dSChandler Carruth unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(), 407664e354dSChandler Carruth Src->getScalarType()); 408664e354dSChandler Carruth 409664e354dSChandler Carruth // Return the cost of multiple scalar invocation plus the cost of 410664e354dSChandler Carruth // inserting and extracting the values. 411664e354dSChandler Carruth return getScalarizationOverhead(Dst, true, true) + Num * Cost; 412664e354dSChandler Carruth } 413664e354dSChandler Carruth 414664e354dSChandler Carruth // We already handled vector-to-vector and scalar-to-scalar conversions. This 415664e354dSChandler Carruth // is where we handle bitcast between vectors and scalars. We need to assume 416664e354dSChandler Carruth // that the conversion is scalarized in one way or another. 417664e354dSChandler Carruth if (Opcode == Instruction::BitCast) 418664e354dSChandler Carruth // Illegal bitcasts are done by storing and loading from a stack slot. 419664e354dSChandler Carruth return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + 420664e354dSChandler Carruth (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); 421664e354dSChandler Carruth 422664e354dSChandler Carruth llvm_unreachable("Unhandled cast"); 423664e354dSChandler Carruth } 424664e354dSChandler Carruth 425664e354dSChandler Carruth unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { 426664e354dSChandler Carruth // Branches are assumed to be predicted. 427664e354dSChandler Carruth return 0; 428664e354dSChandler Carruth } 429664e354dSChandler Carruth 430664e354dSChandler Carruth unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 431664e354dSChandler Carruth Type *CondTy) const { 432afc1036fSBill Wendling const TargetLoweringBase *TLI = getTLI(); 433664e354dSChandler Carruth int ISD = TLI->InstructionOpcodeToISD(Opcode); 434664e354dSChandler Carruth assert(ISD && "Invalid opcode"); 435664e354dSChandler Carruth 436664e354dSChandler Carruth // Selects on vectors are actually vector selects. 437664e354dSChandler Carruth if (ISD == ISD::SELECT) { 438664e354dSChandler Carruth assert(CondTy && "CondTy must exist"); 439664e354dSChandler Carruth if (CondTy->isVectorTy()) 440664e354dSChandler Carruth ISD = ISD::VSELECT; 441664e354dSChandler Carruth } 442664e354dSChandler Carruth 443664e354dSChandler Carruth std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); 444664e354dSChandler Carruth 445664e354dSChandler Carruth if (!TLI->isOperationExpand(ISD, LT.second)) { 446664e354dSChandler Carruth // The operation is legal. Assume it costs 1. Multiply 447664e354dSChandler Carruth // by the type-legalization overhead. 448664e354dSChandler Carruth return LT.first * 1; 449664e354dSChandler Carruth } 450664e354dSChandler Carruth 451664e354dSChandler Carruth // Otherwise, assume that the cast is scalarized. 452664e354dSChandler Carruth if (ValTy->isVectorTy()) { 453664e354dSChandler Carruth unsigned Num = ValTy->getVectorNumElements(); 454664e354dSChandler Carruth if (CondTy) 455664e354dSChandler Carruth CondTy = CondTy->getScalarType(); 456664e354dSChandler Carruth unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(), 457664e354dSChandler Carruth CondTy); 458664e354dSChandler Carruth 459664e354dSChandler Carruth // Return the cost of multiple scalar invocation plus the cost of inserting 460664e354dSChandler Carruth // and extracting the values. 461664e354dSChandler Carruth return getScalarizationOverhead(ValTy, true, false) + Num * Cost; 462664e354dSChandler Carruth } 463664e354dSChandler Carruth 464664e354dSChandler Carruth // Unknown scalar opcode. 465664e354dSChandler Carruth return 1; 466664e354dSChandler Carruth } 467664e354dSChandler Carruth 468664e354dSChandler Carruth unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, 469664e354dSChandler Carruth unsigned Index) const { 470ce376c0fSRaul E. Silvera std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Val->getScalarType()); 471ce376c0fSRaul E. Silvera 472ce376c0fSRaul E. Silvera return LT.first; 473664e354dSChandler Carruth } 474664e354dSChandler Carruth 475664e354dSChandler Carruth unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, 476664e354dSChandler Carruth unsigned Alignment, 477664e354dSChandler Carruth unsigned AddressSpace) const { 478664e354dSChandler Carruth assert(!Src->isVoidTy() && "Invalid type"); 479afc1036fSBill Wendling std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Src); 480664e354dSChandler Carruth 4816fd19ab3SHal Finkel // Assuming that all loads of legal types cost 1. 4826fd19ab3SHal Finkel unsigned Cost = LT.first; 4836fd19ab3SHal Finkel 4846fd19ab3SHal Finkel if (Src->isVectorTy() && 4856fd19ab3SHal Finkel Src->getPrimitiveSizeInBits() < LT.second.getSizeInBits()) { 4866fd19ab3SHal Finkel // This is a vector load that legalizes to a larger type than the vector 4876fd19ab3SHal Finkel // itself. Unless the corresponding extending load or truncating store is 4886fd19ab3SHal Finkel // legal, then this will scalarize. 48956bf297eSHal Finkel TargetLowering::LegalizeAction LA = TargetLowering::Expand; 49056bf297eSHal Finkel EVT MemVT = getTLI()->getValueType(Src, true); 49156bf297eSHal Finkel if (MemVT.isSimple() && MemVT != MVT::Other) { 4926fd19ab3SHal Finkel if (Opcode == Instruction::Store) 49356bf297eSHal Finkel LA = getTLI()->getTruncStoreAction(LT.second, MemVT.getSimpleVT()); 4946fd19ab3SHal Finkel else 49556bf297eSHal Finkel LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, MemVT.getSimpleVT()); 49656bf297eSHal Finkel } 4976fd19ab3SHal Finkel 4986fd19ab3SHal Finkel if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) { 4996fd19ab3SHal Finkel // This is a vector load/store for some illegal type that is scalarized. 5006fd19ab3SHal Finkel // We must account for the cost of building or decomposing the vector. 5016fd19ab3SHal Finkel Cost += getScalarizationOverhead(Src, Opcode != Instruction::Store, 5026fd19ab3SHal Finkel Opcode == Instruction::Store); 5036fd19ab3SHal Finkel } 5046fd19ab3SHal Finkel } 5056fd19ab3SHal Finkel 5066fd19ab3SHal Finkel return Cost; 507664e354dSChandler Carruth } 508664e354dSChandler Carruth 509f7cfac7aSBenjamin Kramer unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, 510664e354dSChandler Carruth ArrayRef<Type *> Tys) const { 511f7cfac7aSBenjamin Kramer unsigned ISD = 0; 512f7cfac7aSBenjamin Kramer switch (IID) { 513f7cfac7aSBenjamin Kramer default: { 514f7cfac7aSBenjamin Kramer // Assume that we need to scalarize this intrinsic. 515664e354dSChandler Carruth unsigned ScalarizationCost = 0; 516664e354dSChandler Carruth unsigned ScalarCalls = 1; 517664e354dSChandler Carruth if (RetTy->isVectorTy()) { 518664e354dSChandler Carruth ScalarizationCost = getScalarizationOverhead(RetTy, true, false); 519664e354dSChandler Carruth ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); 520664e354dSChandler Carruth } 521664e354dSChandler Carruth for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { 522664e354dSChandler Carruth if (Tys[i]->isVectorTy()) { 523664e354dSChandler Carruth ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); 524664e354dSChandler Carruth ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); 525664e354dSChandler Carruth } 526664e354dSChandler Carruth } 527f7cfac7aSBenjamin Kramer 528664e354dSChandler Carruth return ScalarCalls + ScalarizationCost; 529664e354dSChandler Carruth } 530f7cfac7aSBenjamin Kramer // Look for intrinsics that can be lowered directly or turned into a scalar 531f7cfac7aSBenjamin Kramer // intrinsic call. 532f7cfac7aSBenjamin Kramer case Intrinsic::sqrt: ISD = ISD::FSQRT; break; 533f7cfac7aSBenjamin Kramer case Intrinsic::sin: ISD = ISD::FSIN; break; 534f7cfac7aSBenjamin Kramer case Intrinsic::cos: ISD = ISD::FCOS; break; 535f7cfac7aSBenjamin Kramer case Intrinsic::exp: ISD = ISD::FEXP; break; 536f7cfac7aSBenjamin Kramer case Intrinsic::exp2: ISD = ISD::FEXP2; break; 537f7cfac7aSBenjamin Kramer case Intrinsic::log: ISD = ISD::FLOG; break; 538f7cfac7aSBenjamin Kramer case Intrinsic::log10: ISD = ISD::FLOG10; break; 539f7cfac7aSBenjamin Kramer case Intrinsic::log2: ISD = ISD::FLOG2; break; 540f7cfac7aSBenjamin Kramer case Intrinsic::fabs: ISD = ISD::FABS; break; 5410c5c01aaSHal Finkel case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; 542f7cfac7aSBenjamin Kramer case Intrinsic::floor: ISD = ISD::FFLOOR; break; 543f7cfac7aSBenjamin Kramer case Intrinsic::ceil: ISD = ISD::FCEIL; break; 544f7cfac7aSBenjamin Kramer case Intrinsic::trunc: ISD = ISD::FTRUNC; break; 545ec474f28SHal Finkel case Intrinsic::nearbyint: 546ec474f28SHal Finkel ISD = ISD::FNEARBYINT; break; 547f7cfac7aSBenjamin Kramer case Intrinsic::rint: ISD = ISD::FRINT; break; 548171817eeSHal Finkel case Intrinsic::round: ISD = ISD::FROUND; break; 549f7cfac7aSBenjamin Kramer case Intrinsic::pow: ISD = ISD::FPOW; break; 550f7cfac7aSBenjamin Kramer case Intrinsic::fma: ISD = ISD::FMA; break; 5511625bfccSBenjamin Kramer case Intrinsic::fmuladd: ISD = ISD::FMA; break; 552a7cd6bf3SArnold Schwaighofer case Intrinsic::lifetime_start: 553a7cd6bf3SArnold Schwaighofer case Intrinsic::lifetime_end: 554a7cd6bf3SArnold Schwaighofer return 0; 555f7cfac7aSBenjamin Kramer } 556f7cfac7aSBenjamin Kramer 557afc1036fSBill Wendling const TargetLoweringBase *TLI = getTLI(); 558f7cfac7aSBenjamin Kramer std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(RetTy); 559f7cfac7aSBenjamin Kramer 560f7cfac7aSBenjamin Kramer if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { 561f7cfac7aSBenjamin Kramer // The operation is legal. Assume it costs 1. 562f7cfac7aSBenjamin Kramer // If the type is split to multiple registers, assume that thre is some 563f7cfac7aSBenjamin Kramer // overhead to this. 564f7cfac7aSBenjamin Kramer // TODO: Once we have extract/insert subvector cost we need to use them. 565f7cfac7aSBenjamin Kramer if (LT.first > 1) 566f7cfac7aSBenjamin Kramer return LT.first * 2; 567f7cfac7aSBenjamin Kramer return LT.first * 1; 568f7cfac7aSBenjamin Kramer } 569f7cfac7aSBenjamin Kramer 570f7cfac7aSBenjamin Kramer if (!TLI->isOperationExpand(ISD, LT.second)) { 571f7cfac7aSBenjamin Kramer // If the operation is custom lowered then assume 572f7cfac7aSBenjamin Kramer // thare the code is twice as expensive. 573f7cfac7aSBenjamin Kramer return LT.first * 2; 574f7cfac7aSBenjamin Kramer } 575f7cfac7aSBenjamin Kramer 5761625bfccSBenjamin Kramer // If we can't lower fmuladd into an FMA estimate the cost as a floating 5771625bfccSBenjamin Kramer // point mul followed by an add. 5781625bfccSBenjamin Kramer if (IID == Intrinsic::fmuladd) 5791625bfccSBenjamin Kramer return TopTTI->getArithmeticInstrCost(BinaryOperator::FMul, RetTy) + 5801625bfccSBenjamin Kramer TopTTI->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy); 5811625bfccSBenjamin Kramer 582f7cfac7aSBenjamin Kramer // Else, assume that we need to scalarize this intrinsic. For math builtins 583f7cfac7aSBenjamin Kramer // this will emit a costly libcall, adding call overhead and spills. Make it 584f7cfac7aSBenjamin Kramer // very expensive. 585f7cfac7aSBenjamin Kramer if (RetTy->isVectorTy()) { 586f7cfac7aSBenjamin Kramer unsigned Num = RetTy->getVectorNumElements(); 587f7cfac7aSBenjamin Kramer unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(), 588f7cfac7aSBenjamin Kramer Tys); 589f7cfac7aSBenjamin Kramer return 10 * Cost * Num; 590f7cfac7aSBenjamin Kramer } 591f7cfac7aSBenjamin Kramer 592f7cfac7aSBenjamin Kramer // This is going to be turned into a library call, make it expensive. 593f7cfac7aSBenjamin Kramer return 10; 594f7cfac7aSBenjamin Kramer } 595664e354dSChandler Carruth 596664e354dSChandler Carruth unsigned BasicTTI::getNumberOfParts(Type *Tp) const { 597afc1036fSBill Wendling std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(Tp); 598664e354dSChandler Carruth return LT.first; 599664e354dSChandler Carruth } 600594fa2dcSArnold Schwaighofer 6019da9a43aSArnold Schwaighofer unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { 602594fa2dcSArnold Schwaighofer return 0; 603594fa2dcSArnold Schwaighofer } 604cae8735aSArnold Schwaighofer 605cae8735aSArnold Schwaighofer unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty, 606cae8735aSArnold Schwaighofer bool IsPairwise) const { 607cae8735aSArnold Schwaighofer assert(Ty->isVectorTy() && "Expect a vector type"); 608cae8735aSArnold Schwaighofer unsigned NumVecElts = Ty->getVectorNumElements(); 609cae8735aSArnold Schwaighofer unsigned NumReduxLevels = Log2_32(NumVecElts); 610cae8735aSArnold Schwaighofer unsigned ArithCost = NumReduxLevels * 611cae8735aSArnold Schwaighofer TopTTI->getArithmeticInstrCost(Opcode, Ty); 612cae8735aSArnold Schwaighofer // Assume the pairwise shuffles add a cost. 613cae8735aSArnold Schwaighofer unsigned ShuffleCost = 614cae8735aSArnold Schwaighofer NumReduxLevels * (IsPairwise + 1) * 615cae8735aSArnold Schwaighofer TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); 616cae8735aSArnold Schwaighofer return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); 617cae8735aSArnold Schwaighofer } 618