1 //===-- X86TargetTransformInfo.cpp - X86 specific TTI pass ----------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// \file 10 /// This file implements a TargetTransformInfo analysis pass specific to the 11 /// X86 target machine. It uses the target's detailed information to provide 12 /// more precise answers to certain TTI queries, while letting the target 13 /// independent and default TTI implementations handle the rest. 14 /// 15 //===----------------------------------------------------------------------===// 16 17 #define DEBUG_TYPE "x86tti" 18 #include "X86.h" 19 #include "X86TargetMachine.h" 20 #include "llvm/Analysis/TargetTransformInfo.h" 21 #include "llvm/Support/Debug.h" 22 #include "llvm/Target/TargetLowering.h" 23 #include "llvm/Target/CostTable.h" 24 using namespace llvm; 25 26 // Declare the pass initialization routine locally as target-specific passes 27 // don't havve a target-wide initialization entry point, and so we rely on the 28 // pass constructor initialization. 29 namespace llvm { 30 void initializeX86TTIPass(PassRegistry &); 31 } 32 33 namespace { 34 35 class X86TTI : public ImmutablePass, public TargetTransformInfo { 36 const X86TargetMachine *TM; 37 const X86Subtarget *ST; 38 const X86TargetLowering *TLI; 39 40 /// Estimate the overhead of scalarizing an instruction. Insert and Extract 41 /// are set if the result needs to be inserted and/or extracted from vectors. 42 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; 43 44 public: 45 X86TTI() : ImmutablePass(ID), TM(0), ST(0), TLI(0) { 46 llvm_unreachable("This pass cannot be directly constructed"); 47 } 48 49 X86TTI(const X86TargetMachine *TM) 50 : ImmutablePass(ID), TM(TM), ST(TM->getSubtargetImpl()), 51 TLI(TM->getTargetLowering()) { 52 initializeX86TTIPass(*PassRegistry::getPassRegistry()); 53 } 54 55 virtual void initializePass() { 56 pushTTIStack(this); 57 } 58 59 virtual void finalizePass() { 60 popTTIStack(); 61 } 62 63 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 64 TargetTransformInfo::getAnalysisUsage(AU); 65 } 66 67 /// Pass identification. 68 static char ID; 69 70 /// Provide necessary pointer adjustments for the two base classes. 71 virtual void *getAdjustedAnalysisPointer(const void *ID) { 72 if (ID == &TargetTransformInfo::ID) 73 return (TargetTransformInfo*)this; 74 return this; 75 } 76 77 /// \name Scalar TTI Implementations 78 /// @{ 79 virtual PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; 80 81 /// @} 82 83 /// \name Vector TTI Implementations 84 /// @{ 85 86 virtual unsigned getNumberOfRegisters(bool Vector) const; 87 virtual unsigned getRegisterBitWidth(bool Vector) const; 88 virtual unsigned getMaximumUnrollFactor() const; 89 virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty) const; 90 virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, 91 int Index, Type *SubTp) const; 92 virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, 93 Type *Src) const; 94 virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 95 Type *CondTy) const; 96 virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, 97 unsigned Index) const; 98 virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, 99 unsigned Alignment, 100 unsigned AddressSpace) const; 101 102 /// @} 103 }; 104 105 } // end anonymous namespace 106 107 INITIALIZE_AG_PASS(X86TTI, TargetTransformInfo, "x86tti", 108 "X86 Target Transform Info", true, true, false) 109 char X86TTI::ID = 0; 110 111 ImmutablePass * 112 llvm::createX86TargetTransformInfoPass(const X86TargetMachine *TM) { 113 return new X86TTI(TM); 114 } 115 116 117 //===----------------------------------------------------------------------===// 118 // 119 // X86 cost model. 120 // 121 //===----------------------------------------------------------------------===// 122 123 X86TTI::PopcntSupportKind X86TTI::getPopcntSupport(unsigned TyWidth) const { 124 assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); 125 // TODO: Currently the __builtin_popcount() implementation using SSE3 126 // instructions is inefficient. Once the problem is fixed, we should 127 // call ST->hasSSE3() instead of ST->hasSSE4(). 128 return ST->hasSSE41() ? PSK_FastHardware : PSK_Software; 129 } 130 131 unsigned X86TTI::getNumberOfRegisters(bool Vector) const { 132 if (Vector && !ST->hasSSE1()) 133 return 0; 134 135 if (ST->is64Bit()) 136 return 16; 137 return 8; 138 } 139 140 unsigned X86TTI::getRegisterBitWidth(bool Vector) const { 141 if (Vector) { 142 if (ST->hasAVX()) return 256; 143 if (ST->hasSSE1()) return 128; 144 return 0; 145 } 146 147 if (ST->is64Bit()) 148 return 64; 149 return 32; 150 151 } 152 153 unsigned X86TTI::getMaximumUnrollFactor() const { 154 if (ST->isAtom()) 155 return 1; 156 157 // Sandybridge and Haswell have multiple execution ports and pipelined 158 // vector units. 159 if (ST->hasAVX()) 160 return 4; 161 162 return 2; 163 } 164 165 unsigned X86TTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty) const { 166 // Legalize the type. 167 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Ty); 168 169 int ISD = TLI->InstructionOpcodeToISD(Opcode); 170 assert(ISD && "Invalid opcode"); 171 172 static const CostTblEntry<MVT> AVX2CostTable[] = { 173 // Shifts on v4i64/v8i32 on AVX2 is legal even though we declare to 174 // customize them to detect the cases where shift amount is a scalar one. 175 { ISD::SHL, MVT::v4i32, 1 }, 176 { ISD::SRL, MVT::v4i32, 1 }, 177 { ISD::SRA, MVT::v4i32, 1 }, 178 { ISD::SHL, MVT::v8i32, 1 }, 179 { ISD::SRL, MVT::v8i32, 1 }, 180 { ISD::SRA, MVT::v8i32, 1 }, 181 { ISD::SHL, MVT::v2i64, 1 }, 182 { ISD::SRL, MVT::v2i64, 1 }, 183 { ISD::SHL, MVT::v4i64, 1 }, 184 { ISD::SRL, MVT::v4i64, 1 }, 185 }; 186 187 // Look for AVX2 lowering tricks. 188 if (ST->hasAVX2()) { 189 int Idx = CostTableLookup<MVT>(AVX2CostTable, array_lengthof(AVX2CostTable), 190 ISD, LT.second); 191 if (Idx != -1) 192 return LT.first * AVX2CostTable[Idx].Cost; 193 } 194 195 static const CostTblEntry<MVT> AVX1CostTable[] = { 196 // We don't have to scalarize unsupported ops. We can issue two half-sized 197 // operations and we only need to extract the upper YMM half. 198 // Two ops + 1 extract + 1 insert = 4. 199 { ISD::MUL, MVT::v8i32, 4 }, 200 { ISD::SUB, MVT::v8i32, 4 }, 201 { ISD::ADD, MVT::v8i32, 4 }, 202 { ISD::SUB, MVT::v4i64, 4 }, 203 { ISD::ADD, MVT::v4i64, 4 }, 204 // A v4i64 multiply is custom lowered as two split v2i64 vectors that then 205 // are lowered as a series of long multiplies(3), shifts(4) and adds(2) 206 // Because we believe v4i64 to be a legal type, we must also include the 207 // split factor of two in the cost table. Therefore, the cost here is 18 208 // instead of 9. 209 { ISD::MUL, MVT::v4i64, 18 }, 210 }; 211 212 // Look for AVX1 lowering tricks. 213 if (ST->hasAVX() && !ST->hasAVX2()) { 214 int Idx = CostTableLookup<MVT>(AVX1CostTable, array_lengthof(AVX1CostTable), 215 ISD, LT.second); 216 if (Idx != -1) 217 return LT.first * AVX1CostTable[Idx].Cost; 218 } 219 220 // Custom lowering of vectors. 221 static const CostTblEntry<MVT> CustomLowered[] = { 222 // A v2i64/v4i64 and multiply is custom lowered as a series of long 223 // multiplies(3), shifts(4) and adds(2). 224 { ISD::MUL, MVT::v2i64, 9 }, 225 { ISD::MUL, MVT::v4i64, 9 }, 226 }; 227 int Idx = CostTableLookup<MVT>(CustomLowered, array_lengthof(CustomLowered), 228 ISD, LT.second); 229 if (Idx != -1) 230 return LT.first * CustomLowered[Idx].Cost; 231 232 // Special lowering of v4i32 mul on sse2, sse3: Lower v4i32 mul as 2x shuffle, 233 // 2x pmuludq, 2x shuffle. 234 if (ISD == ISD::MUL && LT.second == MVT::v4i32 && ST->hasSSE2() && 235 !ST->hasSSE41()) 236 return 6; 237 238 // Fallback to the default implementation. 239 return TargetTransformInfo::getArithmeticInstrCost(Opcode, Ty); 240 } 241 242 unsigned X86TTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, 243 Type *SubTp) const { 244 // We only estimate the cost of reverse shuffles. 245 if (Kind != SK_Reverse) 246 return TargetTransformInfo::getShuffleCost(Kind, Tp, Index, SubTp); 247 248 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Tp); 249 unsigned Cost = 1; 250 if (LT.second.getSizeInBits() > 128) 251 Cost = 3; // Extract + insert + copy. 252 253 // Multiple by the number of parts. 254 return Cost * LT.first; 255 } 256 257 unsigned X86TTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { 258 int ISD = TLI->InstructionOpcodeToISD(Opcode); 259 assert(ISD && "Invalid opcode"); 260 261 EVT SrcTy = TLI->getValueType(Src); 262 EVT DstTy = TLI->getValueType(Dst); 263 264 if (!SrcTy.isSimple() || !DstTy.isSimple()) 265 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 266 267 static const TypeConversionCostTblEntry<MVT> AVXConversionTbl[] = { 268 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, 269 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 1 }, 270 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, 271 { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i32, 1 }, 272 { ISD::TRUNCATE, MVT::v4i32, MVT::v4i64, 1 }, 273 { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 1 }, 274 { ISD::SINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, 275 { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, 276 { ISD::UINT_TO_FP, MVT::v8f32, MVT::v8i8, 1 }, 277 { ISD::UINT_TO_FP, MVT::v4f32, MVT::v4i8, 1 }, 278 { ISD::FP_TO_SINT, MVT::v8i8, MVT::v8f32, 1 }, 279 { ISD::FP_TO_SINT, MVT::v4i8, MVT::v4f32, 1 }, 280 { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 6 }, 281 { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 9 }, 282 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 8 }, 283 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i8, 6 }, 284 { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i16, 6 }, 285 { ISD::TRUNCATE, MVT::v8i32, MVT::v8i64, 3 }, 286 }; 287 288 if (ST->hasAVX()) { 289 int Idx = ConvertCostTableLookup<MVT>(AVXConversionTbl, 290 array_lengthof(AVXConversionTbl), 291 ISD, DstTy.getSimpleVT(), SrcTy.getSimpleVT()); 292 if (Idx != -1) 293 return AVXConversionTbl[Idx].Cost; 294 } 295 296 return TargetTransformInfo::getCastInstrCost(Opcode, Dst, Src); 297 } 298 299 unsigned X86TTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, 300 Type *CondTy) const { 301 // Legalize the type. 302 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(ValTy); 303 304 MVT MTy = LT.second; 305 306 int ISD = TLI->InstructionOpcodeToISD(Opcode); 307 assert(ISD && "Invalid opcode"); 308 309 static const CostTblEntry<MVT> SSE42CostTbl[] = { 310 { ISD::SETCC, MVT::v2f64, 1 }, 311 { ISD::SETCC, MVT::v4f32, 1 }, 312 { ISD::SETCC, MVT::v2i64, 1 }, 313 { ISD::SETCC, MVT::v4i32, 1 }, 314 { ISD::SETCC, MVT::v8i16, 1 }, 315 { ISD::SETCC, MVT::v16i8, 1 }, 316 }; 317 318 static const CostTblEntry<MVT> AVX1CostTbl[] = { 319 { ISD::SETCC, MVT::v4f64, 1 }, 320 { ISD::SETCC, MVT::v8f32, 1 }, 321 // AVX1 does not support 8-wide integer compare. 322 { ISD::SETCC, MVT::v4i64, 4 }, 323 { ISD::SETCC, MVT::v8i32, 4 }, 324 { ISD::SETCC, MVT::v16i16, 4 }, 325 { ISD::SETCC, MVT::v32i8, 4 }, 326 }; 327 328 static const CostTblEntry<MVT> AVX2CostTbl[] = { 329 { ISD::SETCC, MVT::v4i64, 1 }, 330 { ISD::SETCC, MVT::v8i32, 1 }, 331 { ISD::SETCC, MVT::v16i16, 1 }, 332 { ISD::SETCC, MVT::v32i8, 1 }, 333 }; 334 335 if (ST->hasAVX2()) { 336 int Idx = CostTableLookup<MVT>(AVX2CostTbl, array_lengthof(AVX2CostTbl), ISD, MTy); 337 if (Idx != -1) 338 return LT.first * AVX2CostTbl[Idx].Cost; 339 } 340 341 if (ST->hasAVX()) { 342 int Idx = CostTableLookup<MVT>(AVX1CostTbl, array_lengthof(AVX1CostTbl), ISD, MTy); 343 if (Idx != -1) 344 return LT.first * AVX1CostTbl[Idx].Cost; 345 } 346 347 if (ST->hasSSE42()) { 348 int Idx = CostTableLookup<MVT>(SSE42CostTbl, array_lengthof(SSE42CostTbl), ISD, MTy); 349 if (Idx != -1) 350 return LT.first * SSE42CostTbl[Idx].Cost; 351 } 352 353 return TargetTransformInfo::getCmpSelInstrCost(Opcode, ValTy, CondTy); 354 } 355 356 unsigned X86TTI::getVectorInstrCost(unsigned Opcode, Type *Val, 357 unsigned Index) const { 358 assert(Val->isVectorTy() && "This must be a vector type"); 359 360 if (Index != -1U) { 361 // Legalize the type. 362 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Val); 363 364 // This type is legalized to a scalar type. 365 if (!LT.second.isVector()) 366 return 0; 367 368 // The type may be split. Normalize the index to the new type. 369 unsigned Width = LT.second.getVectorNumElements(); 370 Index = Index % Width; 371 372 // Floating point scalars are already located in index #0. 373 if (Val->getScalarType()->isFloatingPointTy() && Index == 0) 374 return 0; 375 } 376 377 return TargetTransformInfo::getVectorInstrCost(Opcode, Val, Index); 378 } 379 380 unsigned X86TTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, 381 unsigned AddressSpace) const { 382 // Legalize the type. 383 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(Src); 384 assert((Opcode == Instruction::Load || Opcode == Instruction::Store) && 385 "Invalid Opcode"); 386 387 // Each load/store unit costs 1. 388 unsigned Cost = LT.first * 1; 389 390 // On Sandybridge 256bit load/stores are double pumped 391 // (but not on Haswell). 392 if (LT.second.getSizeInBits() > 128 && !ST->hasAVX2()) 393 Cost*=2; 394 395 return Cost; 396 } 397