1 //===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// \file 9 /// This file provides helpers for the implementation of 10 /// a TargetTransformInfo-conforming class. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 15 #define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H 16 17 #include "llvm/Analysis/ScalarEvolutionExpressions.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/Analysis/VectorUtils.h" 20 #include "llvm/IR/DataLayout.h" 21 #include "llvm/IR/GetElementPtrTypeIterator.h" 22 #include "llvm/IR/IntrinsicInst.h" 23 #include "llvm/IR/Operator.h" 24 #include "llvm/IR/PatternMatch.h" 25 #include <optional> 26 #include <utility> 27 28 namespace llvm { 29 30 class Function; 31 32 /// Base class for use as a mix-in that aids implementing 33 /// a TargetTransformInfo-compatible class. 34 class TargetTransformInfoImplBase { 35 protected: 36 typedef TargetTransformInfo TTI; 37 38 const DataLayout &DL; 39 TargetTransformInfoImplBase(const DataLayout & DL)40 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {} 41 42 public: 43 // Provide value semantics. MSVC requires that we spell all of these out. 44 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg) = default; TargetTransformInfoImplBase(TargetTransformInfoImplBase && Arg)45 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {} 46 getDataLayout()47 const DataLayout &getDataLayout() const { return DL; } 48 getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands,Type * AccessType,TTI::TargetCostKind CostKind)49 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 50 ArrayRef<const Value *> Operands, Type *AccessType, 51 TTI::TargetCostKind CostKind) const { 52 // In the basic model, we just assume that all-constant GEPs will be folded 53 // into their uses via addressing modes. 54 for (const Value *Operand : Operands) 55 if (!isa<Constant>(Operand)) 56 return TTI::TCC_Basic; 57 58 return TTI::TCC_Free; 59 } 60 getEstimatedNumberOfCaseClusters(const SwitchInst & SI,unsigned & JTSize,ProfileSummaryInfo * PSI,BlockFrequencyInfo * BFI)61 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI, 62 unsigned &JTSize, 63 ProfileSummaryInfo *PSI, 64 BlockFrequencyInfo *BFI) const { 65 (void)PSI; 66 (void)BFI; 67 JTSize = 0; 68 return SI.getNumCases(); 69 } 70 getInliningThresholdMultiplier()71 unsigned getInliningThresholdMultiplier() const { return 1; } getInliningCostBenefitAnalysisSavingsMultiplier()72 unsigned getInliningCostBenefitAnalysisSavingsMultiplier() const { return 8; } getInliningCostBenefitAnalysisProfitableMultiplier()73 unsigned getInliningCostBenefitAnalysisProfitableMultiplier() const { 74 return 8; 75 } adjustInliningThreshold(const CallBase * CB)76 unsigned adjustInliningThreshold(const CallBase *CB) const { return 0; } getCallerAllocaCost(const CallBase * CB,const AllocaInst * AI)77 unsigned getCallerAllocaCost(const CallBase *CB, const AllocaInst *AI) const { 78 return 0; 79 }; 80 getInlinerVectorBonusPercent()81 int getInlinerVectorBonusPercent() const { return 150; } 82 getMemcpyCost(const Instruction * I)83 InstructionCost getMemcpyCost(const Instruction *I) const { 84 return TTI::TCC_Expensive; 85 } 86 getMaxMemIntrinsicInlineSizeThreshold()87 uint64_t getMaxMemIntrinsicInlineSizeThreshold() const { 88 return 64; 89 } 90 91 // Although this default value is arbitrary, it is not random. It is assumed 92 // that a condition that evaluates the same way by a higher percentage than 93 // this is best represented as control flow. Therefore, the default value N 94 // should be set such that the win from N% correct executions is greater than 95 // the loss from (100 - N)% mispredicted executions for the majority of 96 // intended targets. getPredictableBranchThreshold()97 BranchProbability getPredictableBranchThreshold() const { 98 return BranchProbability(99, 100); 99 } 100 101 bool hasBranchDivergence(const Function *F = nullptr) const { return false; } 102 isSourceOfDivergence(const Value * V)103 bool isSourceOfDivergence(const Value *V) const { return false; } 104 isAlwaysUniform(const Value * V)105 bool isAlwaysUniform(const Value *V) const { return false; } 106 isValidAddrSpaceCast(unsigned FromAS,unsigned ToAS)107 bool isValidAddrSpaceCast(unsigned FromAS, unsigned ToAS) const { 108 return false; 109 } 110 addrspacesMayAlias(unsigned AS0,unsigned AS1)111 bool addrspacesMayAlias(unsigned AS0, unsigned AS1) const { 112 return true; 113 } 114 getFlatAddressSpace()115 unsigned getFlatAddressSpace() const { return -1; } 116 collectFlatAddressOperands(SmallVectorImpl<int> & OpIndexes,Intrinsic::ID IID)117 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes, 118 Intrinsic::ID IID) const { 119 return false; 120 } 121 isNoopAddrSpaceCast(unsigned,unsigned)122 bool isNoopAddrSpaceCast(unsigned, unsigned) const { return false; } canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS)123 bool canHaveNonUndefGlobalInitializerInAddressSpace(unsigned AS) const { 124 return AS == 0; 125 }; 126 getAssumedAddrSpace(const Value * V)127 unsigned getAssumedAddrSpace(const Value *V) const { return -1; } 128 isSingleThreaded()129 bool isSingleThreaded() const { return false; } 130 131 std::pair<const Value *, unsigned> getPredicatedAddrSpace(const Value * V)132 getPredicatedAddrSpace(const Value *V) const { 133 return std::make_pair(nullptr, -1); 134 } 135 rewriteIntrinsicWithAddressSpace(IntrinsicInst * II,Value * OldV,Value * NewV)136 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV, 137 Value *NewV) const { 138 return nullptr; 139 } 140 isLoweredToCall(const Function * F)141 bool isLoweredToCall(const Function *F) const { 142 assert(F && "A concrete function must be provided to this routine."); 143 144 // FIXME: These should almost certainly not be handled here, and instead 145 // handled with the help of TLI or the target itself. This was largely 146 // ported from existing analysis heuristics here so that such refactorings 147 // can take place in the future. 148 149 if (F->isIntrinsic()) 150 return false; 151 152 if (F->hasLocalLinkage() || !F->hasName()) 153 return true; 154 155 StringRef Name = F->getName(); 156 157 // These will all likely lower to a single selection DAG node. 158 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || 159 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || 160 Name == "fmin" || Name == "fminf" || Name == "fminl" || 161 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" || 162 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || 163 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") 164 return false; 165 166 // These are all likely to be optimized into something smaller. 167 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || 168 Name == "exp2l" || Name == "exp2f" || Name == "floor" || 169 Name == "floorf" || Name == "ceil" || Name == "round" || 170 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" || 171 Name == "llabs") 172 return false; 173 174 return true; 175 } 176 isHardwareLoopProfitable(Loop * L,ScalarEvolution & SE,AssumptionCache & AC,TargetLibraryInfo * LibInfo,HardwareLoopInfo & HWLoopInfo)177 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE, 178 AssumptionCache &AC, TargetLibraryInfo *LibInfo, 179 HardwareLoopInfo &HWLoopInfo) const { 180 return false; 181 } 182 preferPredicateOverEpilogue(TailFoldingInfo * TFI)183 bool preferPredicateOverEpilogue(TailFoldingInfo *TFI) const { return false; } 184 185 TailFoldingStyle 186 getPreferredTailFoldingStyle(bool IVUpdateMayOverflow = true) const { 187 return TailFoldingStyle::DataWithoutLaneMask; 188 } 189 instCombineIntrinsic(InstCombiner & IC,IntrinsicInst & II)190 std::optional<Instruction *> instCombineIntrinsic(InstCombiner &IC, 191 IntrinsicInst &II) const { 192 return std::nullopt; 193 } 194 195 std::optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedMask,KnownBits & Known,bool & KnownBitsComputed)196 simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II, 197 APInt DemandedMask, KnownBits &Known, 198 bool &KnownBitsComputed) const { 199 return std::nullopt; 200 } 201 simplifyDemandedVectorEltsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedElts,APInt & UndefElts,APInt & UndefElts2,APInt & UndefElts3,std::function<void (Instruction *,unsigned,APInt,APInt &)> SimplifyAndSetOp)202 std::optional<Value *> simplifyDemandedVectorEltsIntrinsic( 203 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts, 204 APInt &UndefElts2, APInt &UndefElts3, 205 std::function<void(Instruction *, unsigned, APInt, APInt &)> 206 SimplifyAndSetOp) const { 207 return std::nullopt; 208 } 209 getUnrollingPreferences(Loop *,ScalarEvolution &,TTI::UnrollingPreferences &,OptimizationRemarkEmitter *)210 void getUnrollingPreferences(Loop *, ScalarEvolution &, 211 TTI::UnrollingPreferences &, 212 OptimizationRemarkEmitter *) const {} 213 getPeelingPreferences(Loop *,ScalarEvolution &,TTI::PeelingPreferences &)214 void getPeelingPreferences(Loop *, ScalarEvolution &, 215 TTI::PeelingPreferences &) const {} 216 isLegalAddImmediate(int64_t Imm)217 bool isLegalAddImmediate(int64_t Imm) const { return false; } 218 isLegalICmpImmediate(int64_t Imm)219 bool isLegalICmpImmediate(int64_t Imm) const { return false; } 220 221 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, 222 bool HasBaseReg, int64_t Scale, unsigned AddrSpace, 223 Instruction *I = nullptr) const { 224 // Guess that only reg and reg+reg addressing is allowed. This heuristic is 225 // taken from the implementation of LSR. 226 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1); 227 } 228 isLSRCostLess(const TTI::LSRCost & C1,const TTI::LSRCost & C2)229 bool isLSRCostLess(const TTI::LSRCost &C1, const TTI::LSRCost &C2) const { 230 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds, 231 C1.ScaleCost, C1.ImmCost, C1.SetupCost) < 232 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds, 233 C2.ScaleCost, C2.ImmCost, C2.SetupCost); 234 } 235 isNumRegsMajorCostOfLSR()236 bool isNumRegsMajorCostOfLSR() const { return true; } 237 shouldFoldTerminatingConditionAfterLSR()238 bool shouldFoldTerminatingConditionAfterLSR() const { return false; } 239 isProfitableLSRChainElement(Instruction * I)240 bool isProfitableLSRChainElement(Instruction *I) const { return false; } 241 canMacroFuseCmp()242 bool canMacroFuseCmp() const { return false; } 243 canSaveCmp(Loop * L,BranchInst ** BI,ScalarEvolution * SE,LoopInfo * LI,DominatorTree * DT,AssumptionCache * AC,TargetLibraryInfo * LibInfo)244 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI, 245 DominatorTree *DT, AssumptionCache *AC, 246 TargetLibraryInfo *LibInfo) const { 247 return false; 248 } 249 250 TTI::AddressingModeKind getPreferredAddressingMode(const Loop * L,ScalarEvolution * SE)251 getPreferredAddressingMode(const Loop *L, ScalarEvolution *SE) const { 252 return TTI::AMK_None; 253 } 254 isLegalMaskedStore(Type * DataType,Align Alignment)255 bool isLegalMaskedStore(Type *DataType, Align Alignment) const { 256 return false; 257 } 258 isLegalMaskedLoad(Type * DataType,Align Alignment)259 bool isLegalMaskedLoad(Type *DataType, Align Alignment) const { 260 return false; 261 } 262 isLegalNTStore(Type * DataType,Align Alignment)263 bool isLegalNTStore(Type *DataType, Align Alignment) const { 264 // By default, assume nontemporal memory stores are available for stores 265 // that are aligned and have a size that is a power of 2. 266 unsigned DataSize = DL.getTypeStoreSize(DataType); 267 return Alignment >= DataSize && isPowerOf2_32(DataSize); 268 } 269 isLegalNTLoad(Type * DataType,Align Alignment)270 bool isLegalNTLoad(Type *DataType, Align Alignment) const { 271 // By default, assume nontemporal memory loads are available for loads that 272 // are aligned and have a size that is a power of 2. 273 unsigned DataSize = DL.getTypeStoreSize(DataType); 274 return Alignment >= DataSize && isPowerOf2_32(DataSize); 275 } 276 isLegalBroadcastLoad(Type * ElementTy,ElementCount NumElements)277 bool isLegalBroadcastLoad(Type *ElementTy, ElementCount NumElements) const { 278 return false; 279 } 280 isLegalMaskedScatter(Type * DataType,Align Alignment)281 bool isLegalMaskedScatter(Type *DataType, Align Alignment) const { 282 return false; 283 } 284 isLegalMaskedGather(Type * DataType,Align Alignment)285 bool isLegalMaskedGather(Type *DataType, Align Alignment) const { 286 return false; 287 } 288 forceScalarizeMaskedGather(VectorType * DataType,Align Alignment)289 bool forceScalarizeMaskedGather(VectorType *DataType, Align Alignment) const { 290 return false; 291 } 292 forceScalarizeMaskedScatter(VectorType * DataType,Align Alignment)293 bool forceScalarizeMaskedScatter(VectorType *DataType, 294 Align Alignment) const { 295 return false; 296 } 297 isLegalMaskedCompressStore(Type * DataType)298 bool isLegalMaskedCompressStore(Type *DataType) const { return false; } 299 isLegalAltInstr(VectorType * VecTy,unsigned Opcode0,unsigned Opcode1,const SmallBitVector & OpcodeMask)300 bool isLegalAltInstr(VectorType *VecTy, unsigned Opcode0, unsigned Opcode1, 301 const SmallBitVector &OpcodeMask) const { 302 return false; 303 } 304 isLegalMaskedExpandLoad(Type * DataType)305 bool isLegalMaskedExpandLoad(Type *DataType) const { return false; } 306 enableOrderedReductions()307 bool enableOrderedReductions() const { return false; } 308 hasDivRemOp(Type * DataType,bool IsSigned)309 bool hasDivRemOp(Type *DataType, bool IsSigned) const { return false; } 310 hasVolatileVariant(Instruction * I,unsigned AddrSpace)311 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const { 312 return false; 313 } 314 prefersVectorizedAddressing()315 bool prefersVectorizedAddressing() const { return true; } 316 getScalingFactorCost(Type * Ty,GlobalValue * BaseGV,int64_t BaseOffset,bool HasBaseReg,int64_t Scale,unsigned AddrSpace)317 InstructionCost getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, 318 int64_t BaseOffset, bool HasBaseReg, 319 int64_t Scale, 320 unsigned AddrSpace) const { 321 // Guess that all legal addressing mode are free. 322 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale, 323 AddrSpace)) 324 return 0; 325 return -1; 326 } 327 LSRWithInstrQueries()328 bool LSRWithInstrQueries() const { return false; } 329 isTruncateFree(Type * Ty1,Type * Ty2)330 bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } 331 isProfitableToHoist(Instruction * I)332 bool isProfitableToHoist(Instruction *I) const { return true; } 333 useAA()334 bool useAA() const { return false; } 335 isTypeLegal(Type * Ty)336 bool isTypeLegal(Type *Ty) const { return false; } 337 getRegUsageForType(Type * Ty)338 unsigned getRegUsageForType(Type *Ty) const { return 1; } 339 shouldBuildLookupTables()340 bool shouldBuildLookupTables() const { return true; } 341 shouldBuildLookupTablesForConstant(Constant * C)342 bool shouldBuildLookupTablesForConstant(Constant *C) const { return true; } 343 shouldBuildRelLookupTables()344 bool shouldBuildRelLookupTables() const { return false; } 345 useColdCCForColdCall(Function & F)346 bool useColdCCForColdCall(Function &F) const { return false; } 347 getScalarizationOverhead(VectorType * Ty,const APInt & DemandedElts,bool Insert,bool Extract,TTI::TargetCostKind CostKind)348 InstructionCost getScalarizationOverhead(VectorType *Ty, 349 const APInt &DemandedElts, 350 bool Insert, bool Extract, 351 TTI::TargetCostKind CostKind) const { 352 return 0; 353 } 354 355 InstructionCost getOperandsScalarizationOverhead(ArrayRef<const Value * > Args,ArrayRef<Type * > Tys,TTI::TargetCostKind CostKind)356 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args, 357 ArrayRef<Type *> Tys, 358 TTI::TargetCostKind CostKind) const { 359 return 0; 360 } 361 supportsEfficientVectorElementLoadStore()362 bool supportsEfficientVectorElementLoadStore() const { return false; } 363 supportsTailCalls()364 bool supportsTailCalls() const { return true; } 365 supportsTailCallFor(const CallBase * CB)366 bool supportsTailCallFor(const CallBase *CB) const { 367 return supportsTailCalls(); 368 } 369 enableAggressiveInterleaving(bool LoopHasReductions)370 bool enableAggressiveInterleaving(bool LoopHasReductions) const { 371 return false; 372 } 373 enableMemCmpExpansion(bool OptSize,bool IsZeroCmp)374 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize, 375 bool IsZeroCmp) const { 376 return {}; 377 } 378 enableSelectOptimize()379 bool enableSelectOptimize() const { return true; } 380 shouldTreatInstructionLikeSelect(const Instruction * I)381 bool shouldTreatInstructionLikeSelect(const Instruction *I) { 382 // If the select is a logical-and/logical-or then it is better treated as a 383 // and/or by the backend. 384 using namespace llvm::PatternMatch; 385 return isa<SelectInst>(I) && 386 !match(I, m_CombineOr(m_LogicalAnd(m_Value(), m_Value()), 387 m_LogicalOr(m_Value(), m_Value()))); 388 } 389 enableInterleavedAccessVectorization()390 bool enableInterleavedAccessVectorization() const { return false; } 391 enableMaskedInterleavedAccessVectorization()392 bool enableMaskedInterleavedAccessVectorization() const { return false; } 393 isFPVectorizationPotentiallyUnsafe()394 bool isFPVectorizationPotentiallyUnsafe() const { return false; } 395 allowsMisalignedMemoryAccesses(LLVMContext & Context,unsigned BitWidth,unsigned AddressSpace,Align Alignment,unsigned * Fast)396 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, 397 unsigned AddressSpace, Align Alignment, 398 unsigned *Fast) const { 399 return false; 400 } 401 getPopcntSupport(unsigned IntTyWidthInBit)402 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { 403 return TTI::PSK_Software; 404 } 405 haveFastSqrt(Type * Ty)406 bool haveFastSqrt(Type *Ty) const { return false; } 407 isExpensiveToSpeculativelyExecute(const Instruction * I)408 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { return true; } 409 isFCmpOrdCheaperThanFCmpZero(Type * Ty)410 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const { return true; } 411 getFPOpCost(Type * Ty)412 InstructionCost getFPOpCost(Type *Ty) const { 413 return TargetTransformInfo::TCC_Basic; 414 } 415 getIntImmCodeSizeCost(unsigned Opcode,unsigned Idx,const APInt & Imm,Type * Ty)416 InstructionCost getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, 417 const APInt &Imm, Type *Ty) const { 418 return 0; 419 } 420 getIntImmCost(const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)421 InstructionCost getIntImmCost(const APInt &Imm, Type *Ty, 422 TTI::TargetCostKind CostKind) const { 423 return TTI::TCC_Basic; 424 } 425 426 InstructionCost getIntImmCostInst(unsigned Opcode, unsigned Idx, 427 const APInt &Imm, Type *Ty, 428 TTI::TargetCostKind CostKind, 429 Instruction *Inst = nullptr) const { 430 return TTI::TCC_Free; 431 } 432 getIntImmCostIntrin(Intrinsic::ID IID,unsigned Idx,const APInt & Imm,Type * Ty,TTI::TargetCostKind CostKind)433 InstructionCost getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, 434 const APInt &Imm, Type *Ty, 435 TTI::TargetCostKind CostKind) const { 436 return TTI::TCC_Free; 437 } 438 preferToKeepConstantsAttached(const Instruction & Inst,const Function & Fn)439 bool preferToKeepConstantsAttached(const Instruction &Inst, 440 const Function &Fn) const { 441 return false; 442 } 443 getNumberOfRegisters(unsigned ClassID)444 unsigned getNumberOfRegisters(unsigned ClassID) const { return 8; } 445 446 unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const { 447 return Vector ? 1 : 0; 448 }; 449 getRegisterClassName(unsigned ClassID)450 const char *getRegisterClassName(unsigned ClassID) const { 451 switch (ClassID) { 452 default: 453 return "Generic::Unknown Register Class"; 454 case 0: 455 return "Generic::ScalarRC"; 456 case 1: 457 return "Generic::VectorRC"; 458 } 459 } 460 getRegisterBitWidth(TargetTransformInfo::RegisterKind K)461 TypeSize getRegisterBitWidth(TargetTransformInfo::RegisterKind K) const { 462 return TypeSize::getFixed(32); 463 } 464 getMinVectorRegisterBitWidth()465 unsigned getMinVectorRegisterBitWidth() const { return 128; } 466 getMaxVScale()467 std::optional<unsigned> getMaxVScale() const { return std::nullopt; } getVScaleForTuning()468 std::optional<unsigned> getVScaleForTuning() const { return std::nullopt; } isVScaleKnownToBeAPowerOfTwo()469 bool isVScaleKnownToBeAPowerOfTwo() const { return false; } 470 471 bool shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K)472 shouldMaximizeVectorBandwidth(TargetTransformInfo::RegisterKind K) const { 473 return false; 474 } 475 getMinimumVF(unsigned ElemWidth,bool IsScalable)476 ElementCount getMinimumVF(unsigned ElemWidth, bool IsScalable) const { 477 return ElementCount::get(0, IsScalable); 478 } 479 getMaximumVF(unsigned ElemWidth,unsigned Opcode)480 unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const { return 0; } getStoreMinimumVF(unsigned VF,Type *,Type *)481 unsigned getStoreMinimumVF(unsigned VF, Type *, Type *) const { return VF; } 482 shouldConsiderAddressTypePromotion(const Instruction & I,bool & AllowPromotionWithoutCommonHeader)483 bool shouldConsiderAddressTypePromotion( 484 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const { 485 AllowPromotionWithoutCommonHeader = false; 486 return false; 487 } 488 getCacheLineSize()489 unsigned getCacheLineSize() const { return 0; } 490 std::optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level)491 getCacheSize(TargetTransformInfo::CacheLevel Level) const { 492 switch (Level) { 493 case TargetTransformInfo::CacheLevel::L1D: 494 [[fallthrough]]; 495 case TargetTransformInfo::CacheLevel::L2D: 496 return std::nullopt; 497 } 498 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 499 } 500 501 std::optional<unsigned> getCacheAssociativity(TargetTransformInfo::CacheLevel Level)502 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const { 503 switch (Level) { 504 case TargetTransformInfo::CacheLevel::L1D: 505 [[fallthrough]]; 506 case TargetTransformInfo::CacheLevel::L2D: 507 return std::nullopt; 508 } 509 510 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel"); 511 } 512 getMinPageSize()513 std::optional<unsigned> getMinPageSize() const { return {}; } 514 getPrefetchDistance()515 unsigned getPrefetchDistance() const { return 0; } getMinPrefetchStride(unsigned NumMemAccesses,unsigned NumStridedMemAccesses,unsigned NumPrefetches,bool HasCall)516 unsigned getMinPrefetchStride(unsigned NumMemAccesses, 517 unsigned NumStridedMemAccesses, 518 unsigned NumPrefetches, bool HasCall) const { 519 return 1; 520 } getMaxPrefetchIterationsAhead()521 unsigned getMaxPrefetchIterationsAhead() const { return UINT_MAX; } enableWritePrefetching()522 bool enableWritePrefetching() const { return false; } shouldPrefetchAddressSpace(unsigned AS)523 bool shouldPrefetchAddressSpace(unsigned AS) const { return !AS; } 524 getMaxInterleaveFactor(ElementCount VF)525 unsigned getMaxInterleaveFactor(ElementCount VF) const { return 1; } 526 527 InstructionCost getArithmeticInstrCost( 528 unsigned Opcode, Type *Ty, TTI::TargetCostKind CostKind, 529 TTI::OperandValueInfo Opd1Info, TTI::OperandValueInfo Opd2Info, 530 ArrayRef<const Value *> Args, 531 const Instruction *CxtI = nullptr) const { 532 // Widenable conditions will eventually lower into constants, so some 533 // operations with them will be trivially optimized away. 534 auto IsWidenableCondition = [](const Value *V) { 535 if (auto *II = dyn_cast<IntrinsicInst>(V)) 536 if (II->getIntrinsicID() == Intrinsic::experimental_widenable_condition) 537 return true; 538 return false; 539 }; 540 // FIXME: A number of transformation tests seem to require these values 541 // which seems a little odd for how arbitary there are. 542 switch (Opcode) { 543 default: 544 break; 545 case Instruction::FDiv: 546 case Instruction::FRem: 547 case Instruction::SDiv: 548 case Instruction::SRem: 549 case Instruction::UDiv: 550 case Instruction::URem: 551 // FIXME: Unlikely to be true for CodeSize. 552 return TTI::TCC_Expensive; 553 case Instruction::And: 554 case Instruction::Or: 555 if (any_of(Args, IsWidenableCondition)) 556 return TTI::TCC_Free; 557 break; 558 } 559 560 // Assume a 3cy latency for fp arithmetic ops. 561 if (CostKind == TTI::TCK_Latency) 562 if (Ty->getScalarType()->isFloatingPointTy()) 563 return 3; 564 565 return 1; 566 } 567 getAltInstrCost(VectorType * VecTy,unsigned Opcode0,unsigned Opcode1,const SmallBitVector & OpcodeMask,TTI::TargetCostKind CostKind)568 InstructionCost getAltInstrCost(VectorType *VecTy, unsigned Opcode0, 569 unsigned Opcode1, 570 const SmallBitVector &OpcodeMask, 571 TTI::TargetCostKind CostKind) const { 572 return InstructionCost::getInvalid(); 573 } 574 575 InstructionCost 576 getShuffleCost(TTI::ShuffleKind Kind, VectorType *Ty, ArrayRef<int> Mask, 577 TTI::TargetCostKind CostKind, int Index, VectorType *SubTp, 578 ArrayRef<const Value *> Args = std::nullopt) const { 579 return 1; 580 } 581 getCastInstrCost(unsigned Opcode,Type * Dst,Type * Src,TTI::CastContextHint CCH,TTI::TargetCostKind CostKind,const Instruction * I)582 InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, 583 TTI::CastContextHint CCH, 584 TTI::TargetCostKind CostKind, 585 const Instruction *I) const { 586 switch (Opcode) { 587 default: 588 break; 589 case Instruction::IntToPtr: { 590 unsigned SrcSize = Src->getScalarSizeInBits(); 591 if (DL.isLegalInteger(SrcSize) && 592 SrcSize <= DL.getPointerTypeSizeInBits(Dst)) 593 return 0; 594 break; 595 } 596 case Instruction::PtrToInt: { 597 unsigned DstSize = Dst->getScalarSizeInBits(); 598 if (DL.isLegalInteger(DstSize) && 599 DstSize >= DL.getPointerTypeSizeInBits(Src)) 600 return 0; 601 break; 602 } 603 case Instruction::BitCast: 604 if (Dst == Src || (Dst->isPointerTy() && Src->isPointerTy())) 605 // Identity and pointer-to-pointer casts are free. 606 return 0; 607 break; 608 case Instruction::Trunc: { 609 // trunc to a native type is free (assuming the target has compare and 610 // shift-right of the same width). 611 TypeSize DstSize = DL.getTypeSizeInBits(Dst); 612 if (!DstSize.isScalable() && DL.isLegalInteger(DstSize.getFixedValue())) 613 return 0; 614 break; 615 } 616 } 617 return 1; 618 } 619 getExtractWithExtendCost(unsigned Opcode,Type * Dst,VectorType * VecTy,unsigned Index)620 InstructionCost getExtractWithExtendCost(unsigned Opcode, Type *Dst, 621 VectorType *VecTy, 622 unsigned Index) const { 623 return 1; 624 } 625 626 InstructionCost getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind, 627 const Instruction *I = nullptr) const { 628 // A phi would be free, unless we're costing the throughput because it 629 // will require a register. 630 if (Opcode == Instruction::PHI && CostKind != TTI::TCK_RecipThroughput) 631 return 0; 632 return 1; 633 } 634 getCmpSelInstrCost(unsigned Opcode,Type * ValTy,Type * CondTy,CmpInst::Predicate VecPred,TTI::TargetCostKind CostKind,const Instruction * I)635 InstructionCost getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy, 636 CmpInst::Predicate VecPred, 637 TTI::TargetCostKind CostKind, 638 const Instruction *I) const { 639 return 1; 640 } 641 getVectorInstrCost(unsigned Opcode,Type * Val,TTI::TargetCostKind CostKind,unsigned Index,Value * Op0,Value * Op1)642 InstructionCost getVectorInstrCost(unsigned Opcode, Type *Val, 643 TTI::TargetCostKind CostKind, 644 unsigned Index, Value *Op0, 645 Value *Op1) const { 646 return 1; 647 } 648 getVectorInstrCost(const Instruction & I,Type * Val,TTI::TargetCostKind CostKind,unsigned Index)649 InstructionCost getVectorInstrCost(const Instruction &I, Type *Val, 650 TTI::TargetCostKind CostKind, 651 unsigned Index) const { 652 return 1; 653 } 654 getReplicationShuffleCost(Type * EltTy,int ReplicationFactor,int VF,const APInt & DemandedDstElts,TTI::TargetCostKind CostKind)655 unsigned getReplicationShuffleCost(Type *EltTy, int ReplicationFactor, int VF, 656 const APInt &DemandedDstElts, 657 TTI::TargetCostKind CostKind) { 658 return 1; 659 } 660 getMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,TTI::OperandValueInfo OpInfo,const Instruction * I)661 InstructionCost getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 662 unsigned AddressSpace, 663 TTI::TargetCostKind CostKind, 664 TTI::OperandValueInfo OpInfo, 665 const Instruction *I) const { 666 return 1; 667 } 668 getVPMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,const Instruction * I)669 InstructionCost getVPMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment, 670 unsigned AddressSpace, 671 TTI::TargetCostKind CostKind, 672 const Instruction *I) const { 673 return 1; 674 } 675 getMaskedMemoryOpCost(unsigned Opcode,Type * Src,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind)676 InstructionCost getMaskedMemoryOpCost(unsigned Opcode, Type *Src, 677 Align Alignment, unsigned AddressSpace, 678 TTI::TargetCostKind CostKind) const { 679 return 1; 680 } 681 682 InstructionCost getGatherScatterOpCost(unsigned Opcode, Type *DataTy, 683 const Value *Ptr, bool VariableMask, 684 Align Alignment, 685 TTI::TargetCostKind CostKind, 686 const Instruction *I = nullptr) const { 687 return 1; 688 } 689 getInterleavedMemoryOpCost(unsigned Opcode,Type * VecTy,unsigned Factor,ArrayRef<unsigned> Indices,Align Alignment,unsigned AddressSpace,TTI::TargetCostKind CostKind,bool UseMaskForCond,bool UseMaskForGaps)690 unsigned getInterleavedMemoryOpCost( 691 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices, 692 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind, 693 bool UseMaskForCond, bool UseMaskForGaps) const { 694 return 1; 695 } 696 getIntrinsicInstrCost(const IntrinsicCostAttributes & ICA,TTI::TargetCostKind CostKind)697 InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, 698 TTI::TargetCostKind CostKind) const { 699 switch (ICA.getID()) { 700 default: 701 break; 702 case Intrinsic::annotation: 703 case Intrinsic::assume: 704 case Intrinsic::sideeffect: 705 case Intrinsic::pseudoprobe: 706 case Intrinsic::arithmetic_fence: 707 case Intrinsic::dbg_assign: 708 case Intrinsic::dbg_declare: 709 case Intrinsic::dbg_value: 710 case Intrinsic::dbg_label: 711 case Intrinsic::invariant_start: 712 case Intrinsic::invariant_end: 713 case Intrinsic::launder_invariant_group: 714 case Intrinsic::strip_invariant_group: 715 case Intrinsic::is_constant: 716 case Intrinsic::lifetime_start: 717 case Intrinsic::lifetime_end: 718 case Intrinsic::experimental_noalias_scope_decl: 719 case Intrinsic::objectsize: 720 case Intrinsic::ptr_annotation: 721 case Intrinsic::var_annotation: 722 case Intrinsic::experimental_gc_result: 723 case Intrinsic::experimental_gc_relocate: 724 case Intrinsic::coro_alloc: 725 case Intrinsic::coro_begin: 726 case Intrinsic::coro_free: 727 case Intrinsic::coro_end: 728 case Intrinsic::coro_frame: 729 case Intrinsic::coro_size: 730 case Intrinsic::coro_align: 731 case Intrinsic::coro_suspend: 732 case Intrinsic::coro_subfn_addr: 733 case Intrinsic::threadlocal_address: 734 case Intrinsic::experimental_widenable_condition: 735 case Intrinsic::ssa_copy: 736 // These intrinsics don't actually represent code after lowering. 737 return 0; 738 } 739 return 1; 740 } 741 getCallInstrCost(Function * F,Type * RetTy,ArrayRef<Type * > Tys,TTI::TargetCostKind CostKind)742 InstructionCost getCallInstrCost(Function *F, Type *RetTy, 743 ArrayRef<Type *> Tys, 744 TTI::TargetCostKind CostKind) const { 745 return 1; 746 } 747 748 // Assume that we have a register of the right size for the type. getNumberOfParts(Type * Tp)749 unsigned getNumberOfParts(Type *Tp) const { return 1; } 750 getAddressComputationCost(Type * Tp,ScalarEvolution *,const SCEV *)751 InstructionCost getAddressComputationCost(Type *Tp, ScalarEvolution *, 752 const SCEV *) const { 753 return 0; 754 } 755 getArithmeticReductionCost(unsigned,VectorType *,std::optional<FastMathFlags> FMF,TTI::TargetCostKind)756 InstructionCost getArithmeticReductionCost(unsigned, VectorType *, 757 std::optional<FastMathFlags> FMF, 758 TTI::TargetCostKind) const { 759 return 1; 760 } 761 getMinMaxReductionCost(Intrinsic::ID IID,VectorType *,FastMathFlags,TTI::TargetCostKind)762 InstructionCost getMinMaxReductionCost(Intrinsic::ID IID, VectorType *, 763 FastMathFlags, 764 TTI::TargetCostKind) const { 765 return 1; 766 } 767 getExtendedReductionCost(unsigned Opcode,bool IsUnsigned,Type * ResTy,VectorType * Ty,FastMathFlags FMF,TTI::TargetCostKind CostKind)768 InstructionCost getExtendedReductionCost(unsigned Opcode, bool IsUnsigned, 769 Type *ResTy, VectorType *Ty, 770 FastMathFlags FMF, 771 TTI::TargetCostKind CostKind) const { 772 return 1; 773 } 774 getMulAccReductionCost(bool IsUnsigned,Type * ResTy,VectorType * Ty,TTI::TargetCostKind CostKind)775 InstructionCost getMulAccReductionCost(bool IsUnsigned, Type *ResTy, 776 VectorType *Ty, 777 TTI::TargetCostKind CostKind) const { 778 return 1; 779 } 780 getCostOfKeepingLiveOverCall(ArrayRef<Type * > Tys)781 InstructionCost getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const { 782 return 0; 783 } 784 getTgtMemIntrinsic(IntrinsicInst * Inst,MemIntrinsicInfo & Info)785 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const { 786 return false; 787 } 788 getAtomicMemIntrinsicMaxElementSize()789 unsigned getAtomicMemIntrinsicMaxElementSize() const { 790 // Note for overrides: You must ensure for all element unordered-atomic 791 // memory intrinsics that all power-of-2 element sizes up to, and 792 // including, the return value of this method have a corresponding 793 // runtime lib call. These runtime lib call definitions can be found 794 // in RuntimeLibcalls.h 795 return 0; 796 } 797 getOrCreateResultFromMemIntrinsic(IntrinsicInst * Inst,Type * ExpectedType)798 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst, 799 Type *ExpectedType) const { 800 return nullptr; 801 } 802 803 Type * getMemcpyLoopLoweringType(LLVMContext & Context,Value * Length,unsigned SrcAddrSpace,unsigned DestAddrSpace,unsigned SrcAlign,unsigned DestAlign,std::optional<uint32_t> AtomicElementSize)804 getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length, 805 unsigned SrcAddrSpace, unsigned DestAddrSpace, 806 unsigned SrcAlign, unsigned DestAlign, 807 std::optional<uint32_t> AtomicElementSize) const { 808 return AtomicElementSize ? Type::getIntNTy(Context, *AtomicElementSize * 8) 809 : Type::getInt8Ty(Context); 810 } 811 getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type * > & OpsOut,LLVMContext & Context,unsigned RemainingBytes,unsigned SrcAddrSpace,unsigned DestAddrSpace,unsigned SrcAlign,unsigned DestAlign,std::optional<uint32_t> AtomicCpySize)812 void getMemcpyLoopResidualLoweringType( 813 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context, 814 unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace, 815 unsigned SrcAlign, unsigned DestAlign, 816 std::optional<uint32_t> AtomicCpySize) const { 817 unsigned OpSizeInBytes = AtomicCpySize ? *AtomicCpySize : 1; 818 Type *OpType = Type::getIntNTy(Context, OpSizeInBytes * 8); 819 for (unsigned i = 0; i != RemainingBytes; i += OpSizeInBytes) 820 OpsOut.push_back(OpType); 821 } 822 areInlineCompatible(const Function * Caller,const Function * Callee)823 bool areInlineCompatible(const Function *Caller, 824 const Function *Callee) const { 825 return (Caller->getFnAttribute("target-cpu") == 826 Callee->getFnAttribute("target-cpu")) && 827 (Caller->getFnAttribute("target-features") == 828 Callee->getFnAttribute("target-features")); 829 } 830 getInlineCallPenalty(const Function * F,const CallBase & Call,unsigned DefaultCallPenalty)831 unsigned getInlineCallPenalty(const Function *F, const CallBase &Call, 832 unsigned DefaultCallPenalty) const { 833 return DefaultCallPenalty; 834 } 835 areTypesABICompatible(const Function * Caller,const Function * Callee,const ArrayRef<Type * > & Types)836 bool areTypesABICompatible(const Function *Caller, const Function *Callee, 837 const ArrayRef<Type *> &Types) const { 838 return (Caller->getFnAttribute("target-cpu") == 839 Callee->getFnAttribute("target-cpu")) && 840 (Caller->getFnAttribute("target-features") == 841 Callee->getFnAttribute("target-features")); 842 } 843 isIndexedLoadLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)844 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty, 845 const DataLayout &DL) const { 846 return false; 847 } 848 isIndexedStoreLegal(TTI::MemIndexedMode Mode,Type * Ty,const DataLayout & DL)849 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty, 850 const DataLayout &DL) const { 851 return false; 852 } 853 getLoadStoreVecRegBitWidth(unsigned AddrSpace)854 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; } 855 isLegalToVectorizeLoad(LoadInst * LI)856 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; } 857 isLegalToVectorizeStore(StoreInst * SI)858 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; } 859 isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)860 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment, 861 unsigned AddrSpace) const { 862 return true; 863 } 864 isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,Align Alignment,unsigned AddrSpace)865 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment, 866 unsigned AddrSpace) const { 867 return true; 868 } 869 isLegalToVectorizeReduction(const RecurrenceDescriptor & RdxDesc,ElementCount VF)870 bool isLegalToVectorizeReduction(const RecurrenceDescriptor &RdxDesc, 871 ElementCount VF) const { 872 return true; 873 } 874 isElementTypeLegalForScalableVector(Type * Ty)875 bool isElementTypeLegalForScalableVector(Type *Ty) const { return true; } 876 getLoadVectorFactor(unsigned VF,unsigned LoadSize,unsigned ChainSizeInBytes,VectorType * VecTy)877 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize, 878 unsigned ChainSizeInBytes, 879 VectorType *VecTy) const { 880 return VF; 881 } 882 getStoreVectorFactor(unsigned VF,unsigned StoreSize,unsigned ChainSizeInBytes,VectorType * VecTy)883 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize, 884 unsigned ChainSizeInBytes, 885 VectorType *VecTy) const { 886 return VF; 887 } 888 preferInLoopReduction(unsigned Opcode,Type * Ty,TTI::ReductionFlags Flags)889 bool preferInLoopReduction(unsigned Opcode, Type *Ty, 890 TTI::ReductionFlags Flags) const { 891 return false; 892 } 893 preferPredicatedReductionSelect(unsigned Opcode,Type * Ty,TTI::ReductionFlags Flags)894 bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty, 895 TTI::ReductionFlags Flags) const { 896 return false; 897 } 898 preferEpilogueVectorization()899 bool preferEpilogueVectorization() const { 900 return true; 901 } 902 shouldExpandReduction(const IntrinsicInst * II)903 bool shouldExpandReduction(const IntrinsicInst *II) const { return true; } 904 getGISelRematGlobalCost()905 unsigned getGISelRematGlobalCost() const { return 1; } 906 getMinTripCountTailFoldingThreshold()907 unsigned getMinTripCountTailFoldingThreshold() const { return 0; } 908 supportsScalableVectors()909 bool supportsScalableVectors() const { return false; } 910 enableScalableVectorization()911 bool enableScalableVectorization() const { return false; } 912 hasActiveVectorLength(unsigned Opcode,Type * DataType,Align Alignment)913 bool hasActiveVectorLength(unsigned Opcode, Type *DataType, 914 Align Alignment) const { 915 return false; 916 } 917 918 TargetTransformInfo::VPLegalization getVPLegalizationStrategy(const VPIntrinsic & PI)919 getVPLegalizationStrategy(const VPIntrinsic &PI) const { 920 return TargetTransformInfo::VPLegalization( 921 /* EVLParamStrategy */ TargetTransformInfo::VPLegalization::Discard, 922 /* OperatorStrategy */ TargetTransformInfo::VPLegalization::Convert); 923 } 924 hasArmWideBranch(bool)925 bool hasArmWideBranch(bool) const { return false; } 926 getMaxNumArgs()927 unsigned getMaxNumArgs() const { return UINT_MAX; } 928 929 protected: 930 // Obtain the minimum required size to hold the value (without the sign) 931 // In case of a vector it returns the min required size for one element. minRequiredElementSize(const Value * Val,bool & isSigned)932 unsigned minRequiredElementSize(const Value *Val, bool &isSigned) const { 933 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) { 934 const auto *VectorValue = cast<Constant>(Val); 935 936 // In case of a vector need to pick the max between the min 937 // required size for each element 938 auto *VT = cast<FixedVectorType>(Val->getType()); 939 940 // Assume unsigned elements 941 isSigned = false; 942 943 // The max required size is the size of the vector element type 944 unsigned MaxRequiredSize = 945 VT->getElementType()->getPrimitiveSizeInBits().getFixedValue(); 946 947 unsigned MinRequiredSize = 0; 948 for (unsigned i = 0, e = VT->getNumElements(); i < e; ++i) { 949 if (auto *IntElement = 950 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) { 951 bool signedElement = IntElement->getValue().isNegative(); 952 // Get the element min required size. 953 unsigned ElementMinRequiredSize = 954 IntElement->getValue().getSignificantBits() - 1; 955 // In case one element is signed then all the vector is signed. 956 isSigned |= signedElement; 957 // Save the max required bit size between all the elements. 958 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize); 959 } else { 960 // not an int constant element 961 return MaxRequiredSize; 962 } 963 } 964 return MinRequiredSize; 965 } 966 967 if (const auto *CI = dyn_cast<ConstantInt>(Val)) { 968 isSigned = CI->getValue().isNegative(); 969 return CI->getValue().getSignificantBits() - 1; 970 } 971 972 if (const auto *Cast = dyn_cast<SExtInst>(Val)) { 973 isSigned = true; 974 return Cast->getSrcTy()->getScalarSizeInBits() - 1; 975 } 976 977 if (const auto *Cast = dyn_cast<ZExtInst>(Val)) { 978 isSigned = false; 979 return Cast->getSrcTy()->getScalarSizeInBits(); 980 } 981 982 isSigned = false; 983 return Val->getType()->getScalarSizeInBits(); 984 } 985 isStridedAccess(const SCEV * Ptr)986 bool isStridedAccess(const SCEV *Ptr) const { 987 return Ptr && isa<SCEVAddRecExpr>(Ptr); 988 } 989 getConstantStrideStep(ScalarEvolution * SE,const SCEV * Ptr)990 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE, 991 const SCEV *Ptr) const { 992 if (!isStridedAccess(Ptr)) 993 return nullptr; 994 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr); 995 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE)); 996 } 997 isConstantStridedAccessLessThan(ScalarEvolution * SE,const SCEV * Ptr,int64_t MergeDistance)998 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr, 999 int64_t MergeDistance) const { 1000 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr); 1001 if (!Step) 1002 return false; 1003 APInt StrideVal = Step->getAPInt(); 1004 if (StrideVal.getBitWidth() > 64) 1005 return false; 1006 // FIXME: Need to take absolute value for negative stride case. 1007 return StrideVal.getSExtValue() < MergeDistance; 1008 } 1009 }; 1010 1011 /// CRTP base class for use as a mix-in that aids implementing 1012 /// a TargetTransformInfo-compatible class. 1013 template <typename T> 1014 class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase { 1015 private: 1016 typedef TargetTransformInfoImplBase BaseT; 1017 1018 protected: TargetTransformInfoImplCRTPBase(const DataLayout & DL)1019 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {} 1020 1021 public: 1022 using BaseT::getGEPCost; 1023 getGEPCost(Type * PointeeType,const Value * Ptr,ArrayRef<const Value * > Operands,Type * AccessType,TTI::TargetCostKind CostKind)1024 InstructionCost getGEPCost(Type *PointeeType, const Value *Ptr, 1025 ArrayRef<const Value *> Operands, Type *AccessType, 1026 TTI::TargetCostKind CostKind) { 1027 assert(PointeeType && Ptr && "can't get GEPCost of nullptr"); 1028 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts()); 1029 bool HasBaseReg = (BaseGV == nullptr); 1030 1031 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType()); 1032 APInt BaseOffset(PtrSizeBits, 0); 1033 int64_t Scale = 0; 1034 1035 auto GTI = gep_type_begin(PointeeType, Operands); 1036 Type *TargetType = nullptr; 1037 1038 // Handle the case where the GEP instruction has a single operand, 1039 // the basis, therefore TargetType is a nullptr. 1040 if (Operands.empty()) 1041 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic; 1042 1043 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) { 1044 TargetType = GTI.getIndexedType(); 1045 // We assume that the cost of Scalar GEP with constant index and the 1046 // cost of Vector GEP with splat constant index are the same. 1047 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I); 1048 if (!ConstIdx) 1049 if (auto Splat = getSplatValue(*I)) 1050 ConstIdx = dyn_cast<ConstantInt>(Splat); 1051 if (StructType *STy = GTI.getStructTypeOrNull()) { 1052 // For structures the index is always splat or scalar constant 1053 assert(ConstIdx && "Unexpected GEP index"); 1054 uint64_t Field = ConstIdx->getZExtValue(); 1055 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field); 1056 } else { 1057 // If this operand is a scalable type, bail out early. 1058 // TODO: Make isLegalAddressingMode TypeSize aware. 1059 if (TargetType->isScalableTy()) 1060 return TTI::TCC_Basic; 1061 int64_t ElementSize = 1062 GTI.getSequentialElementStride(DL).getFixedValue(); 1063 if (ConstIdx) { 1064 BaseOffset += 1065 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize; 1066 } else { 1067 // Needs scale register. 1068 if (Scale != 0) 1069 // No addressing mode takes two scale registers. 1070 return TTI::TCC_Basic; 1071 Scale = ElementSize; 1072 } 1073 } 1074 } 1075 1076 // If we haven't been provided a hint, use the target type for now. 1077 // 1078 // TODO: Take a look at potentially removing this: This is *slightly* wrong 1079 // as it's possible to have a GEP with a foldable target type but a memory 1080 // access that isn't foldable. For example, this load isn't foldable on 1081 // RISC-V: 1082 // 1083 // %p = getelementptr i32, ptr %base, i32 42 1084 // %x = load <2 x i32>, ptr %p 1085 if (!AccessType) 1086 AccessType = TargetType; 1087 1088 // If the final address of the GEP is a legal addressing mode for the given 1089 // access type, then we can fold it into its users. 1090 if (static_cast<T *>(this)->isLegalAddressingMode( 1091 AccessType, const_cast<GlobalValue *>(BaseGV), 1092 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, 1093 Ptr->getType()->getPointerAddressSpace())) 1094 return TTI::TCC_Free; 1095 1096 // TODO: Instead of returning TCC_Basic here, we should use 1097 // getArithmeticInstrCost. Or better yet, provide a hook to let the target 1098 // model it. 1099 return TTI::TCC_Basic; 1100 } 1101 getPointersChainCost(ArrayRef<const Value * > Ptrs,const Value * Base,const TTI::PointersChainInfo & Info,Type * AccessTy,TTI::TargetCostKind CostKind)1102 InstructionCost getPointersChainCost(ArrayRef<const Value *> Ptrs, 1103 const Value *Base, 1104 const TTI::PointersChainInfo &Info, 1105 Type *AccessTy, 1106 TTI::TargetCostKind CostKind) { 1107 InstructionCost Cost = TTI::TCC_Free; 1108 // In the basic model we take into account GEP instructions only 1109 // (although here can come alloca instruction, a value, constants and/or 1110 // constant expressions, PHIs, bitcasts ... whatever allowed to be used as a 1111 // pointer). Typically, if Base is a not a GEP-instruction and all the 1112 // pointers are relative to the same base address, all the rest are 1113 // either GEP instructions, PHIs, bitcasts or constants. When we have same 1114 // base, we just calculate cost of each non-Base GEP as an ADD operation if 1115 // any their index is a non-const. 1116 // If no known dependecies between the pointers cost is calculated as a sum 1117 // of costs of GEP instructions. 1118 for (const Value *V : Ptrs) { 1119 const auto *GEP = dyn_cast<GetElementPtrInst>(V); 1120 if (!GEP) 1121 continue; 1122 if (Info.isSameBase() && V != Base) { 1123 if (GEP->hasAllConstantIndices()) 1124 continue; 1125 Cost += static_cast<T *>(this)->getArithmeticInstrCost( 1126 Instruction::Add, GEP->getType(), CostKind, 1127 {TTI::OK_AnyValue, TTI::OP_None}, {TTI::OK_AnyValue, TTI::OP_None}, 1128 std::nullopt); 1129 } else { 1130 SmallVector<const Value *> Indices(GEP->indices()); 1131 Cost += static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(), 1132 GEP->getPointerOperand(), 1133 Indices, AccessTy, CostKind); 1134 } 1135 } 1136 return Cost; 1137 } 1138 getInstructionCost(const User * U,ArrayRef<const Value * > Operands,TTI::TargetCostKind CostKind)1139 InstructionCost getInstructionCost(const User *U, 1140 ArrayRef<const Value *> Operands, 1141 TTI::TargetCostKind CostKind) { 1142 using namespace llvm::PatternMatch; 1143 1144 auto *TargetTTI = static_cast<T *>(this); 1145 // Handle non-intrinsic calls, invokes, and callbr. 1146 // FIXME: Unlikely to be true for anything but CodeSize. 1147 auto *CB = dyn_cast<CallBase>(U); 1148 if (CB && !isa<IntrinsicInst>(U)) { 1149 if (const Function *F = CB->getCalledFunction()) { 1150 if (!TargetTTI->isLoweredToCall(F)) 1151 return TTI::TCC_Basic; // Give a basic cost if it will be lowered 1152 1153 return TTI::TCC_Basic * (F->getFunctionType()->getNumParams() + 1); 1154 } 1155 // For indirect or other calls, scale cost by number of arguments. 1156 return TTI::TCC_Basic * (CB->arg_size() + 1); 1157 } 1158 1159 Type *Ty = U->getType(); 1160 unsigned Opcode = Operator::getOpcode(U); 1161 auto *I = dyn_cast<Instruction>(U); 1162 switch (Opcode) { 1163 default: 1164 break; 1165 case Instruction::Call: { 1166 assert(isa<IntrinsicInst>(U) && "Unexpected non-intrinsic call"); 1167 auto *Intrinsic = cast<IntrinsicInst>(U); 1168 IntrinsicCostAttributes CostAttrs(Intrinsic->getIntrinsicID(), *CB); 1169 return TargetTTI->getIntrinsicInstrCost(CostAttrs, CostKind); 1170 } 1171 case Instruction::Br: 1172 case Instruction::Ret: 1173 case Instruction::PHI: 1174 case Instruction::Switch: 1175 return TargetTTI->getCFInstrCost(Opcode, CostKind, I); 1176 case Instruction::ExtractValue: 1177 case Instruction::Freeze: 1178 return TTI::TCC_Free; 1179 case Instruction::Alloca: 1180 if (cast<AllocaInst>(U)->isStaticAlloca()) 1181 return TTI::TCC_Free; 1182 break; 1183 case Instruction::GetElementPtr: { 1184 const auto *GEP = cast<GEPOperator>(U); 1185 Type *AccessType = nullptr; 1186 // For now, only provide the AccessType in the simple case where the GEP 1187 // only has one user. 1188 if (GEP->hasOneUser() && I) 1189 AccessType = I->user_back()->getAccessType(); 1190 1191 return TargetTTI->getGEPCost(GEP->getSourceElementType(), 1192 Operands.front(), Operands.drop_front(), 1193 AccessType, CostKind); 1194 } 1195 case Instruction::Add: 1196 case Instruction::FAdd: 1197 case Instruction::Sub: 1198 case Instruction::FSub: 1199 case Instruction::Mul: 1200 case Instruction::FMul: 1201 case Instruction::UDiv: 1202 case Instruction::SDiv: 1203 case Instruction::FDiv: 1204 case Instruction::URem: 1205 case Instruction::SRem: 1206 case Instruction::FRem: 1207 case Instruction::Shl: 1208 case Instruction::LShr: 1209 case Instruction::AShr: 1210 case Instruction::And: 1211 case Instruction::Or: 1212 case Instruction::Xor: 1213 case Instruction::FNeg: { 1214 const TTI::OperandValueInfo Op1Info = TTI::getOperandInfo(Operands[0]); 1215 TTI::OperandValueInfo Op2Info; 1216 if (Opcode != Instruction::FNeg) 1217 Op2Info = TTI::getOperandInfo(Operands[1]); 1218 return TargetTTI->getArithmeticInstrCost(Opcode, Ty, CostKind, Op1Info, 1219 Op2Info, Operands, I); 1220 } 1221 case Instruction::IntToPtr: 1222 case Instruction::PtrToInt: 1223 case Instruction::SIToFP: 1224 case Instruction::UIToFP: 1225 case Instruction::FPToUI: 1226 case Instruction::FPToSI: 1227 case Instruction::Trunc: 1228 case Instruction::FPTrunc: 1229 case Instruction::BitCast: 1230 case Instruction::FPExt: 1231 case Instruction::SExt: 1232 case Instruction::ZExt: 1233 case Instruction::AddrSpaceCast: { 1234 Type *OpTy = Operands[0]->getType(); 1235 return TargetTTI->getCastInstrCost( 1236 Opcode, Ty, OpTy, TTI::getCastContextHint(I), CostKind, I); 1237 } 1238 case Instruction::Store: { 1239 auto *SI = cast<StoreInst>(U); 1240 Type *ValTy = Operands[0]->getType(); 1241 TTI::OperandValueInfo OpInfo = TTI::getOperandInfo(Operands[0]); 1242 return TargetTTI->getMemoryOpCost(Opcode, ValTy, SI->getAlign(), 1243 SI->getPointerAddressSpace(), CostKind, 1244 OpInfo, I); 1245 } 1246 case Instruction::Load: { 1247 // FIXME: Arbitary cost which could come from the backend. 1248 if (CostKind == TTI::TCK_Latency) 1249 return 4; 1250 auto *LI = cast<LoadInst>(U); 1251 Type *LoadType = U->getType(); 1252 // If there is a non-register sized type, the cost estimation may expand 1253 // it to be several instructions to load into multiple registers on the 1254 // target. But, if the only use of the load is a trunc instruction to a 1255 // register sized type, the instruction selector can combine these 1256 // instructions to be a single load. So, in this case, we use the 1257 // destination type of the trunc instruction rather than the load to 1258 // accurately estimate the cost of this load instruction. 1259 if (CostKind == TTI::TCK_CodeSize && LI->hasOneUse() && 1260 !LoadType->isVectorTy()) { 1261 if (const TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin())) 1262 LoadType = TI->getDestTy(); 1263 } 1264 return TargetTTI->getMemoryOpCost(Opcode, LoadType, LI->getAlign(), 1265 LI->getPointerAddressSpace(), CostKind, 1266 {TTI::OK_AnyValue, TTI::OP_None}, I); 1267 } 1268 case Instruction::Select: { 1269 const Value *Op0, *Op1; 1270 if (match(U, m_LogicalAnd(m_Value(Op0), m_Value(Op1))) || 1271 match(U, m_LogicalOr(m_Value(Op0), m_Value(Op1)))) { 1272 // select x, y, false --> x & y 1273 // select x, true, y --> x | y 1274 const auto Op1Info = TTI::getOperandInfo(Op0); 1275 const auto Op2Info = TTI::getOperandInfo(Op1); 1276 assert(Op0->getType()->getScalarSizeInBits() == 1 && 1277 Op1->getType()->getScalarSizeInBits() == 1); 1278 1279 SmallVector<const Value *, 2> Operands{Op0, Op1}; 1280 return TargetTTI->getArithmeticInstrCost( 1281 match(U, m_LogicalOr()) ? Instruction::Or : Instruction::And, Ty, 1282 CostKind, Op1Info, Op2Info, Operands, I); 1283 } 1284 Type *CondTy = Operands[0]->getType(); 1285 return TargetTTI->getCmpSelInstrCost(Opcode, U->getType(), CondTy, 1286 CmpInst::BAD_ICMP_PREDICATE, 1287 CostKind, I); 1288 } 1289 case Instruction::ICmp: 1290 case Instruction::FCmp: { 1291 Type *ValTy = Operands[0]->getType(); 1292 // TODO: Also handle ICmp/FCmp constant expressions. 1293 return TargetTTI->getCmpSelInstrCost(Opcode, ValTy, U->getType(), 1294 I ? cast<CmpInst>(I)->getPredicate() 1295 : CmpInst::BAD_ICMP_PREDICATE, 1296 CostKind, I); 1297 } 1298 case Instruction::InsertElement: { 1299 auto *IE = dyn_cast<InsertElementInst>(U); 1300 if (!IE) 1301 return TTI::TCC_Basic; // FIXME 1302 unsigned Idx = -1; 1303 if (auto *CI = dyn_cast<ConstantInt>(Operands[2])) 1304 if (CI->getValue().getActiveBits() <= 32) 1305 Idx = CI->getZExtValue(); 1306 return TargetTTI->getVectorInstrCost(*IE, Ty, CostKind, Idx); 1307 } 1308 case Instruction::ShuffleVector: { 1309 auto *Shuffle = dyn_cast<ShuffleVectorInst>(U); 1310 if (!Shuffle) 1311 return TTI::TCC_Basic; // FIXME 1312 1313 auto *VecTy = cast<VectorType>(U->getType()); 1314 auto *VecSrcTy = cast<VectorType>(Operands[0]->getType()); 1315 int NumSubElts, SubIndex; 1316 1317 if (Shuffle->changesLength()) { 1318 // Treat a 'subvector widening' as a free shuffle. 1319 if (Shuffle->increasesLength() && Shuffle->isIdentityWithPadding()) 1320 return 0; 1321 1322 if (Shuffle->isExtractSubvectorMask(SubIndex)) 1323 return TargetTTI->getShuffleCost(TTI::SK_ExtractSubvector, VecSrcTy, 1324 Shuffle->getShuffleMask(), CostKind, 1325 SubIndex, VecTy, Operands); 1326 1327 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1328 return TargetTTI->getShuffleCost( 1329 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), 1330 CostKind, SubIndex, 1331 FixedVectorType::get(VecTy->getScalarType(), NumSubElts), 1332 Operands); 1333 1334 int ReplicationFactor, VF; 1335 if (Shuffle->isReplicationMask(ReplicationFactor, VF)) { 1336 APInt DemandedDstElts = 1337 APInt::getZero(Shuffle->getShuffleMask().size()); 1338 for (auto I : enumerate(Shuffle->getShuffleMask())) { 1339 if (I.value() != PoisonMaskElem) 1340 DemandedDstElts.setBit(I.index()); 1341 } 1342 return TargetTTI->getReplicationShuffleCost( 1343 VecSrcTy->getElementType(), ReplicationFactor, VF, 1344 DemandedDstElts, CostKind); 1345 } 1346 1347 return CostKind == TTI::TCK_RecipThroughput ? -1 : 1; 1348 } 1349 1350 if (Shuffle->isIdentity()) 1351 return 0; 1352 1353 if (Shuffle->isReverse()) 1354 return TargetTTI->getShuffleCost(TTI::SK_Reverse, VecTy, 1355 Shuffle->getShuffleMask(), CostKind, 0, 1356 nullptr, Operands); 1357 1358 if (Shuffle->isSelect()) 1359 return TargetTTI->getShuffleCost(TTI::SK_Select, VecTy, 1360 Shuffle->getShuffleMask(), CostKind, 0, 1361 nullptr, Operands); 1362 1363 if (Shuffle->isTranspose()) 1364 return TargetTTI->getShuffleCost(TTI::SK_Transpose, VecTy, 1365 Shuffle->getShuffleMask(), CostKind, 0, 1366 nullptr, Operands); 1367 1368 if (Shuffle->isZeroEltSplat()) 1369 return TargetTTI->getShuffleCost(TTI::SK_Broadcast, VecTy, 1370 Shuffle->getShuffleMask(), CostKind, 0, 1371 nullptr, Operands); 1372 1373 if (Shuffle->isSingleSource()) 1374 return TargetTTI->getShuffleCost(TTI::SK_PermuteSingleSrc, VecTy, 1375 Shuffle->getShuffleMask(), CostKind, 0, 1376 nullptr, Operands); 1377 1378 if (Shuffle->isInsertSubvectorMask(NumSubElts, SubIndex)) 1379 return TargetTTI->getShuffleCost( 1380 TTI::SK_InsertSubvector, VecTy, Shuffle->getShuffleMask(), CostKind, 1381 SubIndex, FixedVectorType::get(VecTy->getScalarType(), NumSubElts), 1382 Operands); 1383 1384 if (Shuffle->isSplice(SubIndex)) 1385 return TargetTTI->getShuffleCost(TTI::SK_Splice, VecTy, 1386 Shuffle->getShuffleMask(), CostKind, 1387 SubIndex, nullptr, Operands); 1388 1389 return TargetTTI->getShuffleCost(TTI::SK_PermuteTwoSrc, VecTy, 1390 Shuffle->getShuffleMask(), CostKind, 0, 1391 nullptr, Operands); 1392 } 1393 case Instruction::ExtractElement: { 1394 auto *EEI = dyn_cast<ExtractElementInst>(U); 1395 if (!EEI) 1396 return TTI::TCC_Basic; // FIXME 1397 unsigned Idx = -1; 1398 if (auto *CI = dyn_cast<ConstantInt>(Operands[1])) 1399 if (CI->getValue().getActiveBits() <= 32) 1400 Idx = CI->getZExtValue(); 1401 Type *DstTy = Operands[0]->getType(); 1402 return TargetTTI->getVectorInstrCost(*EEI, DstTy, CostKind, Idx); 1403 } 1404 } 1405 1406 // By default, just classify everything as 'basic' or -1 to represent that 1407 // don't know the throughput cost. 1408 return CostKind == TTI::TCK_RecipThroughput ? -1 : TTI::TCC_Basic; 1409 } 1410 isExpensiveToSpeculativelyExecute(const Instruction * I)1411 bool isExpensiveToSpeculativelyExecute(const Instruction *I) { 1412 auto *TargetTTI = static_cast<T *>(this); 1413 SmallVector<const Value *, 4> Ops(I->operand_values()); 1414 InstructionCost Cost = TargetTTI->getInstructionCost( 1415 I, Ops, TargetTransformInfo::TCK_SizeAndLatency); 1416 return Cost >= TargetTransformInfo::TCC_Expensive; 1417 } 1418 }; 1419 } // namespace llvm 1420 1421 #endif 1422