1 //===-- PPCTargetTransformInfo.cpp - PPC specific TTI pass ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file implements a TargetTransformInfo analysis pass specific to the 11 /// PPC target machine. It uses the target's detailed information to provide 12 /// more precise answers to certain TTI queries, while letting the target 13 /// independent and default TTI implementations handle the rest. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #define DEBUG_TYPE "ppctti" 18 #include "PPC.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/Analysis/TargetTransformInfo.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Target/CostTable.h" 23 #include "llvm/Target/TargetLowering.h" 24 using namespace llvm; 25 26 // Declare the pass initialization routine locally as target-specific passes 27 // don't havve a target-wide initialization entry point, and so we rely on the 28 // pass constructor initialization. 29 namespace llvm { 30 void initializePPCTTIPass(PassRegistry &); 31 } 32 33 namespace { 34 35 class PPCTTI LLVM_FINAL : public ImmutablePass, public TargetTransformInfo { 36 const PPCTargetMachine *TM; 37 const PPCSubtarget *ST; 38 const PPCTargetLowering *TLI; 39 40 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 41 /// are set if the result needs to be inserted and/or extracted from vectors. 42 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 43 44 public: 45 PPCTTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { 46 llvm_unreachable("This pass cannot be directly constructed"); 47 } 48 49 PPCTTI(const PPCTargetMachine *TM) 50 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 51 TLI(TM->getTargetLowering()) { 52 initializePPCTTIPass(*PassRegistry::getPassRegistry()); 53 } 54 55 virtual void initializePass() LLVM_OVERRIDE { 56 pushTTIStack(this); 57 } 58 59 virtual void finalizePass() { 60 popTTIStack(); 61 } 62 63 virtual void getAnalysisUsage(AnalysisUsage &AU) const LLVM_OVERRIDE { 64 TargetTransformInfo::getAnalysisUsage(AU); 65 } 66 67 /// Pass identification. 68 static char ID; 69 70 /// Provide necessary pointer adjustments for the two base classes. 71 virtual void *getAdjustedAnalysisPointer(const void *ID) LLVM_OVERRIDE { 72 if (ID == &TargetTransformInfo::ID) 73 return (TargetTransformInfo*)this; 74 return this; 75 } 76 77 /// \name Scalar TTI Implementations 78 /// @{ 79 virtual PopcntSupportKind 80 getPopcntSupport(unsigned TyWidth) const LLVM_OVERRIDE; 81 virtual void getUnrollingPreferences( 82 Loop *L, UnrollingPreferences &UP) const LLVM_OVERRIDE; 83 84 /// @} 85 86 /// \name Vector TTI Implementations 87 /// @{ 88 89 virtual unsigned getNumberOfRegisters(bool Vector) const LLVM_OVERRIDE; 90 virtual unsigned getRegisterBitWidth(bool Vector) const LLVM_OVERRIDE; 91 virtual unsigned getMaximumUnrollFactor() const LLVM_OVERRIDE; 92 virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, 93 OperandValueKind, 94 OperandValueKind) const LLVM_OVERRIDE; 95 virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, 96 int Index, Type *SubTp) const LLVM_OVERRIDE; 97 virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 98 Type *Src) const LLVM_OVERRIDE; 99 virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 100 Type *CondTy) const LLVM_OVERRIDE; 101 virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 102 unsigned Index) const LLVM_OVERRIDE; 103 virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, 104 unsigned Alignment, 105 unsigned AddressSpace) const LLVM_OVERRIDE; 106 107 /// @} 108 }; 109 110 } // end anonymous namespace 111 112 INITIALIZE_AG_PASS(PPCTTI, TargetTransformInfo, "ppctti", 113 "PPC Target Transform Info", true, true, false) 114 char PPCTTI::ID = 0; 115 116 ImmutablePass * 117 llvm::createPPCTargetTransformInfoPass(const PPCTargetMachine *TM) { 118 return new PPCTTI(TM); 119 } 120 121 122 //===----------------------------------------------------------------------===// 123 // 124 // PPC cost model. 125 // 126 //===----------------------------------------------------------------------===// 127 128 PPCTTI::PopcntSupportKind PPCTTI::getPopcntSupport(unsigned TyWidth) const { 129 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 130 if (ST->hasPOPCNTD() && TyWidth <= 64) 131 return PSK_FastHardware; 132 return PSK_Software; 133 } 134 135 void PPCTTI::getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const { 136 if (ST->getDarwinDirective() == PPC::DIR_A2) { 137 // The A2 is in-order with a deep pipeline, and concatenation unrolling 138 // helps expose latency-hiding opportunities to the instruction scheduler. 139 UP.Partial = UP.Runtime = true; 140 } 141 } 142 143 unsigned PPCTTI::getNumberOfRegisters(bool Vector) const { 144 if (Vector && !ST->hasAltivec()) 145 return 0; 146 return 32; 147 } 148 149 unsigned PPCTTI::getRegisterBitWidth(bool Vector) const { 150 if (Vector) { 151 if (ST->hasAltivec()) return 128; 152 return 0; 153 } 154 155 if (ST->isPPC64()) 156 return 64; 157 return 32; 158 159 } 160 161 unsigned PPCTTI::getMaximumUnrollFactor() const { 162 unsigned Directive = ST->getDarwinDirective(); 163 // The 440 has no SIMD support, but floating-point instructions 164 // have a 5-cycle latency, so unroll by 5x for latency hiding. 165 if (Directive == PPC::DIR_440) 166 return 5; 167 168 // The A2 has no SIMD support, but floating-point instructions 169 // have a 6-cycle latency, so unroll by 6x for latency hiding. 170 if (Directive == PPC::DIR_A2) 171 return 6; 172 173 // FIXME: For lack of any better information, do no harm... 174 if (Directive == PPC::DIR_E500mc || Directive == PPC::DIR_E5500) 175 return 1; 176 177 // For most things, modern systems have two execution units (and 178 // out-of-order execution). 179 return 2; 180 } 181 182 unsigned PPCTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, 183 OperandValueKind Op1Info, 184 OperandValueKind Op2Info) const { 185 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); 186 187 // Fallback to the default implementation. 188 return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty, Op1Info, 189 Op2Info); 190 } 191 192 unsigned PPCTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 193 Type *SubTp) const { 194 return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); 195 } 196 197 unsigned PPCTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { 198 assert(TLI->InstructionOpcodeToISD(Opcode) && "Invalid opcode"); 199 200 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 201 } 202 203 unsigned PPCTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 204 Type *CondTy) const { 205 return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); 206 } 207 208 unsigned PPCTTI::getVectorInstrCost(unsigned Opcode, Type *Val, 209 unsigned Index) const { 210 assert(Val->isVectorTy() && "This must be a vector type"); 211 212 int ISD = TLI->InstructionOpcodeToISD(Opcode); 213 assert(ISD && "Invalid opcode"); 214 215 // Estimated cost of a load-hit-store delay. This was obtained 216 // experimentally as a minimum needed to prevent unprofitable 217 // vectorization for the paq8p benchmark. It may need to be 218 // raised further if other unprofitable cases remain. 219 unsigned LHSPenalty = 12; 220 221 // Vector element insert/extract with Altivec is very expensive, 222 // because they require store and reload with the attendant 223 // processor stall for load-hit-store. Until VSX is available, 224 // these need to be estimated as very costly. 225 if (ISD == ISD::EXTRACT_VECTOR_ELT || 226 ISD == ISD::INSERT_VECTOR_ELT) 227 return LHSPenalty + 228 TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 229 230 return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 231 } 232 233 unsigned PPCTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 234 unsigned AddressSpace) const { 235 // Legalize the type. 236 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 237 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && 238 "Invalid Opcode"); 239 240 // Each load/store unit costs 1. 241 unsigned Cost = LT.first * 1; 242 243 // PPC in general does not support unaligned loads and stores. They'll need 244 // to be decomposed based on the alignment factor. 245 unsigned SrcBytes = LT.second.getStoreSize(); 246 if (SrcBytes && Alignment && Alignment < SrcBytes) 247 Cost *= (SrcBytes/Alignment); 248 249 return Cost; 250 } 251 252