1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file provides helpers for the implementation of 11 /// a TargetTransformInfo-conforming class. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 17 18 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 19 #include "llvm/Analysis/TargetTransformInfo.h" 20 #include "llvm/Analysis/VectorUtils.h" 21 #include "llvm/IR/CallSite.h" 22 #include "llvm/IR/DataLayout.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/GetElementPtrTypeIterator.h" 25 #include "llvm/IR/Operator.h" 26 #include "llvm/IR/Type.h" 27 28 namespace llvm { 29 30 /// Base class for use as a mix-in that aids implementing 31 /// a TargetTransformInfo-compatible class. 32 class TargetTransformInfoImplBase { 33 protected: 34 typedef TargetTransformInfo TTI; 35 36 const DataLayout &DL; 37 TargetTransformInfoImplBase(const DataLayout & DL)38 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 39 40 public: 41 // Provide value semantics. MSVC requires that we spell all of these out. TargetTransformInfoImplBase(const TargetTransformInfoImplBase & Arg)42 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) 43 : DL(Arg.DL) {} TargetTransformInfoImplBase(TargetTransformInfoImplBase && Arg)44 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 45 getDataLayout()46 const DataLayout &getDataLayout() const { return DL; } 47 getOperationCost(unsigned Opcode,Type * Ty,Type * OpTy)48 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) { 49 switch (Opcode) { 50 default: 51 // By default, just classify everything as 'basic'. 52 return TTI::TCC_Basic; 53 54 case Instruction::GetElementPtr: 55 llvm_unreachable("Use getGEPCost for GEP operations!"); 56 57 case Instruction::BitCast: 58 assert(OpTy && "Cast instructions must provide the operand type"); 59 if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) 60 // Identity and pointer-to-pointer casts are free. 61 return TTI::TCC_Free; 62 63 // Otherwise, the default basic cost is used. 64 return TTI::TCC_Basic; 65 66 case Instruction::FDiv: 67 case Instruction::FRem: 68 case Instruction::SDiv: 69 case Instruction::SRem: 70 case Instruction::UDiv: 71 case Instruction::URem: 72 return TTI::TCC_Expensive; 73 74 case Instruction::IntToPtr: { 75 // An inttoptr cast is free so long as the input is a legal integer type 76 // which doesn't contain values outside the range of a pointer. 77 unsigned OpSize = OpTy->getScalarSizeInBits(); 78 if (DL.isLegalInteger(OpSize) && 79 OpSize <= DL.getPointerTypeSizeInBits(Ty)) 80 return TTI::TCC_Free; 81 82 // Otherwise it's not a no-op. 83 return TTI::TCC_Basic; 84 } 85 case Instruction::PtrToInt: { 86 // A ptrtoint cast is free so long as the result is large enough to store 87 // the pointer, and a legal integer type. 88 unsigned DestSize = Ty->getScalarSizeInBits(); 89 if (DL.isLegalInteger(DestSize) && 90 DestSize >= DL.getPointerTypeSizeInBits(OpTy)) 91 return TTI::TCC_Free; 92 93 // Otherwise it's not a no-op. 94 return TTI::TCC_Basic; 95 } 96 case Instruction::Trunc: 97 // trunc to a native type is free (assuming the target has compare and 98 // shift-right of the same width). 99 if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty))) 100 return TTI::TCC_Free; 101 102 return TTI::TCC_Basic; 103 } 104 } 105 getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)106 int getGEPCost(Type *PointeeType, const Value *Ptr, 107 ArrayRef<const Value *> Operands) { 108 // In the basic model, we just assume that all-constant GEPs will be folded 109 // into their uses via addressing modes. 110 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) 111 if (!isa<Constant>(Operands[Idx])) 112 return TTI::TCC_Basic; 113 114 return TTI::TCC_Free; 115 } 116 getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize)117 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 118 unsigned &JTSize) { 119 JTSize = 0; 120 return SI.getNumCases(); 121 } 122 getExtCost(const Instruction * I,const Value * Src)123 int getExtCost(const Instruction *I, const Value *Src) { 124 return TTI::TCC_Basic; 125 } 126 getCallCost(FunctionType * FTy,int NumArgs)127 unsigned getCallCost(FunctionType *FTy, int NumArgs) { 128 assert(FTy && "FunctionType must be provided to this routine."); 129 130 // The target-independent implementation just measures the size of the 131 // function by approximating that each argument will take on average one 132 // instruction to prepare. 133 134 if (NumArgs < 0) 135 // Set the argument number to the number of explicit arguments in the 136 // function. 137 NumArgs = FTy->getNumParams(); 138 139 return TTI::TCC_Basic * (NumArgs + 1); 140 } 141 getInliningThresholdMultiplier()142 unsigned getInliningThresholdMultiplier() { return 1; } 143 getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<Type * > ParamTys)144 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 145 ArrayRef<Type *> ParamTys) { 146 switch (IID) { 147 default: 148 // Intrinsics rarely (if ever) have normal argument setup constraints. 149 // Model them as having a basic instruction cost. 150 // FIXME: This is wrong for libc intrinsics. 151 return TTI::TCC_Basic; 152 153 case Intrinsic::annotation: 154 case Intrinsic::assume: 155 case Intrinsic::sideeffect: 156 case Intrinsic::dbg_declare: 157 case Intrinsic::dbg_value: 158 case Intrinsic::dbg_label: 159 case Intrinsic::invariant_start: 160 case Intrinsic::invariant_end: 161 case Intrinsic::launder_invariant_group: 162 case Intrinsic::strip_invariant_group: 163 case Intrinsic::is_constant: 164 case Intrinsic::lifetime_start: 165 case Intrinsic::lifetime_end: 166 case Intrinsic::objectsize: 167 case Intrinsic::ptr_annotation: 168 case Intrinsic::var_annotation: 169 case Intrinsic::experimental_gc_result: 170 case Intrinsic::experimental_gc_relocate: 171 case Intrinsic::coro_alloc: 172 case Intrinsic::coro_begin: 173 case Intrinsic::coro_free: 174 case Intrinsic::coro_end: 175 case Intrinsic::coro_frame: 176 case Intrinsic::coro_size: 177 case Intrinsic::coro_suspend: 178 case Intrinsic::coro_param: 179 case Intrinsic::coro_subfn_addr: 180 // These intrinsics don't actually represent code after lowering. 181 return TTI::TCC_Free; 182 } 183 } 184 hasBranchDivergence()185 bool hasBranchDivergence() { return false; } 186 isSourceOfDivergence(const Value * V)187 bool isSourceOfDivergence(const Value *V) { return false; } 188 isAlwaysUniform(const Value * V)189 bool isAlwaysUniform(const Value *V) { return false; } 190 getFlatAddressSpace()191 unsigned getFlatAddressSpace () { 192 return -1; 193 } 194 isLoweredToCall(const Function * F)195 bool isLoweredToCall(const Function *F) { 196 assert(F && "A concrete function must be provided to this routine."); 197 198 // FIXME: These should almost certainly not be handled here, and instead 199 // handled with the help of TLI or the target itself. This was largely 200 // ported from existing analysis heuristics here so that such refactorings 201 // can take place in the future. 202 203 if (F->isIntrinsic()) 204 return false; 205 206 if (F->hasLocalLinkage() || !F->hasName()) 207 return true; 208 209 StringRef Name = F->getName(); 210 211 // These will all likely lower to a single selection DAG node. 212 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 213 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 214 Name == "fmin" || Name == "fminf" || Name == "fminl" || 215 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 216 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 217 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 218 return false; 219 220 // These are all likely to be optimized into something smaller. 221 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 222 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 223 Name == "floorf" || Name == "ceil" || Name == "round" || 224 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 225 Name == "llabs") 226 return false; 227 228 return true; 229 } 230 getUnrollingPreferences(Loop *,ScalarEvolution &,TTI::UnrollingPreferences &)231 void getUnrollingPreferences(Loop *, ScalarEvolution &, 232 TTI::UnrollingPreferences &) {} 233 isLegalAddImmediate(int64_t Imm)234 bool isLegalAddImmediate(int64_t Imm) { return false; } 235 isLegalICmpImmediate(int64_t Imm)236 bool isLegalICmpImmediate(int64_t Imm) { return false; } 237 238 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 239 bool HasBaseReg, int64_t Scale, 240 unsigned AddrSpace, Instruction *I = nullptr) { 241 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 242 // taken from the implementation of LSR. 243 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 244 } 245 isLSRCostLess(TTI::LSRCost & C1,TTI::LSRCost & C2)246 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) { 247 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 248 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 249 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 250 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 251 } 252 canMacroFuseCmp()253 bool canMacroFuseCmp() { return false; } 254 shouldFavorPostInc()255 bool shouldFavorPostInc() const { return false; } 256 isLegalMaskedStore(Type * DataType)257 bool isLegalMaskedStore(Type *DataType) { return false; } 258 isLegalMaskedLoad(Type * DataType)259 bool isLegalMaskedLoad(Type *DataType) { return false; } 260 isLegalMaskedScatter(Type * DataType)261 bool isLegalMaskedScatter(Type *DataType) { return false; } 262 isLegalMaskedGather(Type * DataType)263 bool isLegalMaskedGather(Type *DataType) { return false; } 264 hasDivRemOp(Type * DataType,bool IsSigned)265 bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; } 266 hasVolatileVariant(Instruction * I,unsigned AddrSpace)267 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; } 268 prefersVectorizedAddressing()269 bool prefersVectorizedAddressing() { return true; } 270 getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)271 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 272 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) { 273 // Guess that all legal addressing mode are free. 274 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, 275 Scale, AddrSpace)) 276 return 0; 277 return -1; 278 } 279 LSRWithInstrQueries()280 bool LSRWithInstrQueries() { return false; } 281 isTruncateFree(Type * Ty1,Type * Ty2)282 bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; } 283 isProfitableToHoist(Instruction * I)284 bool isProfitableToHoist(Instruction *I) { return true; } 285 useAA()286 bool useAA() { return false; } 287 isTypeLegal(Type * Ty)288 bool isTypeLegal(Type *Ty) { return false; } 289 getJumpBufAlignment()290 unsigned getJumpBufAlignment() { return 0; } 291 getJumpBufSize()292 unsigned getJumpBufSize() { return 0; } 293 shouldBuildLookupTables()294 bool shouldBuildLookupTables() { return true; } shouldBuildLookupTablesForConstant(Constant * C)295 bool shouldBuildLookupTablesForConstant(Constant *C) { return true; } 296 useColdCCForColdCall(Function & F)297 bool useColdCCForColdCall(Function &F) { return false; } 298 getScalarizationOverhead(Type * Ty,bool Insert,bool Extract)299 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) { 300 return 0; 301 } 302 getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,unsigned VF)303 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 304 unsigned VF) { return 0; } 305 supportsEfficientVectorElementLoadStore()306 bool supportsEfficientVectorElementLoadStore() { return false; } 307 enableAggressiveInterleaving(bool LoopHasReductions)308 bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; } 309 enableMemCmpExpansion(bool IsZeroCmp)310 const TTI::MemCmpExpansionOptions *enableMemCmpExpansion( 311 bool IsZeroCmp) const { 312 return nullptr; 313 } 314 enableInterleavedAccessVectorization()315 bool enableInterleavedAccessVectorization() { return false; } 316 enableMaskedInterleavedAccessVectorization()317 bool enableMaskedInterleavedAccessVectorization() { return false; } 318 isFPVectorizationPotentiallyUnsafe()319 bool isFPVectorizationPotentiallyUnsafe() { return false; } 320 allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,unsigned Alignment,bool * Fast)321 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, 322 unsigned BitWidth, 323 unsigned AddressSpace, 324 unsigned Alignment, 325 bool *Fast) { return false; } 326 getPopcntSupport(unsigned IntTyWidthInBit)327 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) { 328 return TTI::PSK_Software; 329 } 330 haveFastSqrt(Type * Ty)331 bool haveFastSqrt(Type *Ty) { return false; } 332 isFCmpOrdCheaperThanFCmpZero(Type * Ty)333 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; } 334 getFPOpCost(Type * Ty)335 unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; } 336 getIntImmCodeSizeCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)337 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 338 Type *Ty) { 339 return 0; 340 } 341 getIntImmCost(const APInt & Imm,Type * Ty)342 unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; } 343 getIntImmCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)344 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm, 345 Type *Ty) { 346 return TTI::TCC_Free; 347 } 348 getIntImmCost(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty)349 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm, 350 Type *Ty) { 351 return TTI::TCC_Free; 352 } 353 getNumberOfRegisters(bool Vector)354 unsigned getNumberOfRegisters(bool Vector) { return 8; } 355 getRegisterBitWidth(bool Vector)356 unsigned getRegisterBitWidth(bool Vector) const { return 32; } 357 getMinVectorRegisterBitWidth()358 unsigned getMinVectorRegisterBitWidth() { return 128; } 359 shouldMaximizeVectorBandwidth(bool OptSize)360 bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; } 361 getMinimumVF(unsigned ElemWidth)362 unsigned getMinimumVF(unsigned ElemWidth) const { return 0; } 363 364 bool shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)365 shouldConsiderAddressTypePromotion(const Instruction &I, 366 bool &AllowPromotionWithoutCommonHeader) { 367 AllowPromotionWithoutCommonHeader = false; 368 return false; 369 } 370 getCacheLineSize()371 unsigned getCacheLineSize() { return 0; } 372 getCacheSize(TargetTransformInfo::CacheLevel Level)373 llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) { 374 switch (Level) { 375 case TargetTransformInfo::CacheLevel::L1D: 376 LLVM_FALLTHROUGH; 377 case TargetTransformInfo::CacheLevel::L2D: 378 return llvm::Optional<unsigned>(); 379 } 380 381 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 382 } 383 getCacheAssociativity(TargetTransformInfo::CacheLevel Level)384 llvm::Optional<unsigned> getCacheAssociativity( 385 TargetTransformInfo::CacheLevel Level) { 386 switch (Level) { 387 case TargetTransformInfo::CacheLevel::L1D: 388 LLVM_FALLTHROUGH; 389 case TargetTransformInfo::CacheLevel::L2D: 390 return llvm::Optional<unsigned>(); 391 } 392 393 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 394 } 395 getPrefetchDistance()396 unsigned getPrefetchDistance() { return 0; } 397 getMinPrefetchStride()398 unsigned getMinPrefetchStride() { return 1; } 399 getMaxPrefetchIterationsAhead()400 unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; } 401 getMaxInterleaveFactor(unsigned VF)402 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; } 403 getArithmeticInstrCost(unsigned Opcode,Type * Ty,TTI::OperandValueKind Opd1Info,TTI::OperandValueKind Opd2Info,TTI::OperandValueProperties Opd1PropInfo,TTI::OperandValueProperties Opd2PropInfo,ArrayRef<const Value * > Args)404 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 405 TTI::OperandValueKind Opd1Info, 406 TTI::OperandValueKind Opd2Info, 407 TTI::OperandValueProperties Opd1PropInfo, 408 TTI::OperandValueProperties Opd2PropInfo, 409 ArrayRef<const Value *> Args) { 410 return 1; 411 } 412 getShuffleCost(TTI::ShuffleKind Kind,Type * Ty,int Index,Type * SubTp)413 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index, 414 Type *SubTp) { 415 return 1; 416 } 417 getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,const Instruction * I)418 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 419 const Instruction *I) { return 1; } 420 getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)421 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst, 422 VectorType *VecTy, unsigned Index) { 423 return 1; 424 } 425 getCFInstrCost(unsigned Opcode)426 unsigned getCFInstrCost(unsigned Opcode) { return 1; } 427 getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,const Instruction * I)428 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 429 const Instruction *I) { 430 return 1; 431 } 432 getVectorInstrCost(unsigned Opcode,Type * Val,unsigned Index)433 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) { 434 return 1; 435 } 436 getMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace,const Instruction * I)437 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 438 unsigned AddressSpace, const Instruction *I) { 439 return 1; 440 } 441 getMaskedMemoryOpCost(unsigned Opcode,Type * Src,unsigned Alignment,unsigned AddressSpace)442 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 443 unsigned AddressSpace) { 444 return 1; 445 } 446 getGatherScatterOpCost(unsigned Opcode,Type * DataTy,Value * Ptr,bool VariableMask,unsigned Alignment)447 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr, 448 bool VariableMask, 449 unsigned Alignment) { 450 return 1; 451 } 452 453 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, 454 unsigned Factor, 455 ArrayRef<unsigned> Indices, 456 unsigned Alignment, unsigned AddressSpace, 457 bool UseMaskForCond = false, 458 bool UseMaskForGaps = false) { 459 return 1; 460 } 461 getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Type * > Tys,FastMathFlags FMF,unsigned ScalarizationCostPassed)462 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 463 ArrayRef<Type *> Tys, FastMathFlags FMF, 464 unsigned ScalarizationCostPassed) { 465 return 1; 466 } getIntrinsicInstrCost(Intrinsic::ID ID,Type * RetTy,ArrayRef<Value * > Args,FastMathFlags FMF,unsigned VF)467 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, 468 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) { 469 return 1; 470 } 471 getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys)472 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) { 473 return 1; 474 } 475 getNumberOfParts(Type * Tp)476 unsigned getNumberOfParts(Type *Tp) { return 0; } 477 getAddressComputationCost(Type * Tp,ScalarEvolution *,const SCEV *)478 unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *, 479 const SCEV *) { 480 return 0; 481 } 482 getArithmeticReductionCost(unsigned,Type *,bool)483 unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; } 484 getMinMaxReductionCost(Type *,Type *,bool,bool)485 unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; } 486 getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)487 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; } 488 getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)489 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) { 490 return false; 491 } 492 getAtomicMemIntrinsicMaxElementSize()493 unsigned getAtomicMemIntrinsicMaxElementSize() const { 494 // Note for overrides: You must ensure for all element unordered-atomic 495 // memory intrinsics that all power-of-2 element sizes up to, and 496 // including, the return value of this method have a corresponding 497 // runtime lib call. These runtime lib call definitions can be found 498 // in RuntimeLibcalls.h 499 return 0; 500 } 501 getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)502 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 503 Type *ExpectedType) { 504 return nullptr; 505 } 506 getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAlign,unsigned DestAlign)507 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 508 unsigned SrcAlign, unsigned DestAlign) const { 509 return Type::getInt8Ty(Context); 510 } 511 getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAlign,unsigned DestAlign)512 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut, 513 LLVMContext &Context, 514 unsigned RemainingBytes, 515 unsigned SrcAlign, 516 unsigned DestAlign) const { 517 for (unsigned i = 0; i != RemainingBytes; ++i) 518 OpsOut.push_back(Type::getInt8Ty(Context)); 519 } 520 areInlineCompatible(const Function * Caller,const Function * Callee)521 bool areInlineCompatible(const Function *Caller, 522 const Function *Callee) const { 523 return (Caller->getFnAttribute("target-cpu") == 524 Callee->getFnAttribute("target-cpu")) && 525 (Caller->getFnAttribute("target-features") == 526 Callee->getFnAttribute("target-features")); 527 } 528 areFunctionArgsABICompatible(const Function * Caller,const Function * Callee,SmallPtrSetImpl<Argument * > & Args)529 bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee, 530 SmallPtrSetImpl<Argument *> &Args) const { 531 return (Caller->getFnAttribute("target-cpu") == 532 Callee->getFnAttribute("target-cpu")) && 533 (Caller->getFnAttribute("target-features") == 534 Callee->getFnAttribute("target-features")); 535 } 536 isIndexedLoadLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)537 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 538 const DataLayout &DL) const { 539 return false; 540 } 541 isIndexedStoreLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)542 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 543 const DataLayout &DL) const { 544 return false; 545 } 546 getLoadStoreVecRegBitWidth(unsigned AddrSpace)547 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 548 isLegalToVectorizeLoad(LoadInst * LI)549 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 550 isLegalToVectorizeStore(StoreInst * SI)551 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 552 isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)553 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, 554 unsigned Alignment, 555 unsigned AddrSpace) const { 556 return true; 557 } 558 isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,unsigned Alignment,unsigned AddrSpace)559 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, 560 unsigned Alignment, 561 unsigned AddrSpace) const { 562 return true; 563 } 564 getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)565 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 566 unsigned ChainSizeInBytes, 567 VectorType *VecTy) const { 568 return VF; 569 } 570 getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)571 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 572 unsigned ChainSizeInBytes, 573 VectorType *VecTy) const { 574 return VF; 575 } 576 useReductionIntrinsic(unsigned Opcode,Type * Ty,TTI::ReductionFlags Flags)577 bool useReductionIntrinsic(unsigned Opcode, Type *Ty, 578 TTI::ReductionFlags Flags) const { 579 return false; 580 } 581 shouldExpandReduction(const IntrinsicInst * II)582 bool shouldExpandReduction(const IntrinsicInst *II) const { 583 return true; 584 } 585 586 protected: 587 // Obtain the minimum required size to hold the value (without the sign) 588 // In case of a vector it returns the min required size for one element. minRequiredElementSize(const Value * Val,bool & isSigned)589 unsigned minRequiredElementSize(const Value* Val, bool &isSigned) { 590 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 591 const auto* VectorValue = cast<Constant>(Val); 592 593 // In case of a vector need to pick the max between the min 594 // required size for each element 595 auto *VT = cast<VectorType>(Val->getType()); 596 597 // Assume unsigned elements 598 isSigned = false; 599 600 // The max required size is the total vector width divided by num 601 // of elements in the vector 602 unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements(); 603 604 unsigned MinRequiredSize = 0; 605 for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 606 if (auto* IntElement = 607 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 608 bool signedElement = IntElement->getValue().isNegative(); 609 // Get the element min required size. 610 unsigned ElementMinRequiredSize = 611 IntElement->getValue().getMinSignedBits() - 1; 612 // In case one element is signed then all the vector is signed. 613 isSigned |= signedElement; 614 // Save the max required bit size between all the elements. 615 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 616 } 617 else { 618 // not an int constant element 619 return MaxRequiredSize; 620 } 621 } 622 return MinRequiredSize; 623 } 624 625 if (const auto* CI = dyn_cast<ConstantInt>(Val)) { 626 isSigned = CI->getValue().isNegative(); 627 return CI->getValue().getMinSignedBits() - 1; 628 } 629 630 if (const auto* Cast = dyn_cast<SExtInst>(Val)) { 631 isSigned = true; 632 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 633 } 634 635 if (const auto* Cast = dyn_cast<ZExtInst>(Val)) { 636 isSigned = false; 637 return Cast->getSrcTy()->getScalarSizeInBits(); 638 } 639 640 isSigned = false; 641 return Val->getType()->getScalarSizeInBits(); 642 } 643 isStridedAccess(const SCEV * Ptr)644 bool isStridedAccess(const SCEV *Ptr) { 645 return Ptr && isa<SCEVAddRecExpr>(Ptr); 646 } 647 getConstantStrideStep(ScalarEvolution * SE,const SCEV * Ptr)648 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 649 const SCEV *Ptr) { 650 if (!isStridedAccess(Ptr)) 651 return nullptr; 652 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 653 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 654 } 655 isConstantStridedAccessLessThan(ScalarEvolution * SE,const SCEV * Ptr,int64_t MergeDistance)656 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 657 int64_t MergeDistance) { 658 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 659 if (!Step) 660 return false; 661 APInt StrideVal = Step->getAPInt(); 662 if (StrideVal.getBitWidth() > 64) 663 return false; 664 // FIXME: Need to take absolute value for negative stride case. 665 return StrideVal.getSExtValue() < MergeDistance; 666 } 667 }; 668 669 /// CRTP base class for use as a mix-in that aids implementing 670 /// a TargetTransformInfo-compatible class. 671 template <typename T> 672 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 673 private: 674 typedef TargetTransformInfoImplBase BaseT; 675 676 protected: TargetTransformInfoImplCRTPBase(const DataLayout & DL)677 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 678 679 public: 680 using BaseT::getCallCost; 681 getCallCost(const Function * F,int NumArgs)682 unsigned getCallCost(const Function *F, int NumArgs) { 683 assert(F && "A concrete function must be provided to this routine."); 684 685 if (NumArgs < 0) 686 // Set the argument number to the number of explicit arguments in the 687 // function. 688 NumArgs = F->arg_size(); 689 690 if (Intrinsic::ID IID = F->getIntrinsicID()) { 691 FunctionType *FTy = F->getFunctionType(); 692 SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end()); 693 return static_cast<T *>(this) 694 ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); 695 } 696 697 if (!static_cast<T *>(this)->isLoweredToCall(F)) 698 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 699 // directly. 700 701 return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs); 702 } 703 getCallCost(const Function * F,ArrayRef<const Value * > Arguments)704 unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments) { 705 // Simply delegate to generic handling of the call. 706 // FIXME: We should use instsimplify or something else to catch calls which 707 // will constant fold with these arguments. 708 return static_cast<T *>(this)->getCallCost(F, Arguments.size()); 709 } 710 711 using BaseT::getGEPCost; 712 getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands)713 int getGEPCost(Type *PointeeType, const Value *Ptr, 714 ArrayRef<const Value *> Operands) { 715 const GlobalValue *BaseGV = nullptr; 716 if (Ptr != nullptr) { 717 // TODO: will remove this when pointers have an opaque type. 718 assert(Ptr->getType()->getScalarType()->getPointerElementType() == 719 PointeeType && 720 "explicit pointee type doesn't match operand's pointee type"); 721 BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 722 } 723 bool HasBaseReg = (BaseGV == nullptr); 724 725 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 726 APInt BaseOffset(PtrSizeBits, 0); 727 int64_t Scale = 0; 728 729 auto GTI = gep_type_begin(PointeeType, Operands); 730 Type *TargetType = nullptr; 731 732 // Handle the case where the GEP instruction has a single operand, 733 // the basis, therefore TargetType is a nullptr. 734 if (Operands.empty()) 735 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 736 737 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 738 TargetType = GTI.getIndexedType(); 739 // We assume that the cost of Scalar GEP with constant index and the 740 // cost of Vector GEP with splat constant index are the same. 741 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 742 if (!ConstIdx) 743 if (auto Splat = getSplatValue(*I)) 744 ConstIdx = dyn_cast<ConstantInt>(Splat); 745 if (StructType *STy = GTI.getStructTypeOrNull()) { 746 // For structures the index is always splat or scalar constant 747 assert(ConstIdx && "Unexpected GEP index"); 748 uint64_t Field = ConstIdx->getZExtValue(); 749 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 750 } else { 751 int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType()); 752 if (ConstIdx) { 753 BaseOffset += 754 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 755 } else { 756 // Needs scale register. 757 if (Scale != 0) 758 // No addressing mode takes two scale registers. 759 return TTI::TCC_Basic; 760 Scale = ElementSize; 761 } 762 } 763 } 764 765 // Assumes the address space is 0 when Ptr is nullptr. 766 unsigned AS = 767 (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace()); 768 769 if (static_cast<T *>(this)->isLegalAddressingMode( 770 TargetType, const_cast<GlobalValue *>(BaseGV), 771 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS)) 772 return TTI::TCC_Free; 773 return TTI::TCC_Basic; 774 } 775 776 using BaseT::getIntrinsicCost; 777 getIntrinsicCost(Intrinsic::ID IID,Type * RetTy,ArrayRef<const Value * > Arguments)778 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, 779 ArrayRef<const Value *> Arguments) { 780 // Delegate to the generic intrinsic handling code. This mostly provides an 781 // opportunity for targets to (for example) special case the cost of 782 // certain intrinsics based on constants used as arguments. 783 SmallVector<Type *, 8> ParamTys; 784 ParamTys.reserve(Arguments.size()); 785 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) 786 ParamTys.push_back(Arguments[Idx]->getType()); 787 return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys); 788 } 789 getUserCost(const User * U,ArrayRef<const Value * > Operands)790 unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) { 791 if (isa<PHINode>(U)) 792 return TTI::TCC_Free; // Model all PHI nodes as free. 793 794 // Static alloca doesn't generate target instructions. 795 if (auto *A = dyn_cast<AllocaInst>(U)) 796 if (A->isStaticAlloca()) 797 return TTI::TCC_Free; 798 799 if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) { 800 return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), 801 GEP->getPointerOperand(), 802 Operands.drop_front()); 803 } 804 805 if (auto CS = ImmutableCallSite(U)) { 806 const Function *F = CS.getCalledFunction(); 807 if (!F) { 808 // Just use the called value type. 809 Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); 810 return static_cast<T *>(this) 811 ->getCallCost(cast<FunctionType>(FTy), CS.arg_size()); 812 } 813 814 SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end()); 815 return static_cast<T *>(this)->getCallCost(F, Arguments); 816 } 817 818 if (const CastInst *CI = dyn_cast<CastInst>(U)) { 819 // Result of a cmp instruction is often extended (to be used by other 820 // cmp instructions, logical or return instructions). These are usually 821 // nop on most sane targets. 822 if (isa<CmpInst>(CI->getOperand(0))) 823 return TTI::TCC_Free; 824 if (isa<SExtInst>(CI) || isa<ZExtInst>(CI) || isa<FPExtInst>(CI)) 825 return static_cast<T *>(this)->getExtCost(CI, Operands.back()); 826 } 827 828 return static_cast<T *>(this)->getOperationCost( 829 Operator::getOpcode(U), U->getType(), 830 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr); 831 } 832 getInstructionLatency(const Instruction * I)833 int getInstructionLatency(const Instruction *I) { 834 SmallVector<const Value *, 4> Operands(I->value_op_begin(), 835 I->value_op_end()); 836 if (getUserCost(I, Operands) == TTI::TCC_Free) 837 return 0; 838 839 if (isa<LoadInst>(I)) 840 return 4; 841 842 Type *DstTy = I->getType(); 843 844 // Usually an intrinsic is a simple instruction. 845 // A real function call is much slower. 846 if (auto *CI = dyn_cast<CallInst>(I)) { 847 const Function *F = CI->getCalledFunction(); 848 if (!F || static_cast<T *>(this)->isLoweredToCall(F)) 849 return 40; 850 // Some intrinsics return a value and a flag, we use the value type 851 // to decide its latency. 852 if (StructType* StructTy = dyn_cast<StructType>(DstTy)) 853 DstTy = StructTy->getElementType(0); 854 // Fall through to simple instructions. 855 } 856 857 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy)) 858 DstTy = VectorTy->getElementType(); 859 if (DstTy->isFloatingPointTy()) 860 return 3; 861 862 return 1; 863 } 864 }; 865 } 866 867 #endif 868