1 //===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the cost model analysis. It provides a very basic cost 11 // estimation for LLVM-IR. This analysis uses the services of the codegen 12 // to approximate the cost of any IR instruction when lowered to machine 13 // instructions. The cost results are unit-less and the cost number represents 14 // the throughput of the machine assuming that all loads hit the cache, all 15 // branches are predicted, etc. The cost numbers can be added in order to 16 // compare two or more transformation alternatives. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #define CM_NAME "cost-model" 21 #define DEBUG_TYPE CM_NAME 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/Analysis/Passes.h" 24 #include "llvm/Analysis/TargetTransformInfo.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/IR/Instructions.h" 27 #include "llvm/IR/IntrinsicInst.h" 28 #include "llvm/IR/Value.h" 29 #include "llvm/Pass.h" 30 #include "llvm/Support/CommandLine.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Support/raw_ostream.h" 33 using namespace llvm; 34 35 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), 36 cl::Hidden, 37 cl::desc("Recognize reduction patterns.")); 38 39 namespace { 40 class CostModelAnalysis : public FunctionPass { 41 42 public: 43 static char ID; // Class identification, replacement for typeinfo 44 CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { 45 initializeCostModelAnalysisPass( 46 *PassRegistry::getPassRegistry()); 47 } 48 49 /// Returns the expected cost of the instruction. 50 /// Returns -1 if the cost is unknown. 51 /// Note, this method does not cache the cost calculation and it 52 /// can be expensive in some cases. 53 unsigned getInstructionCost(const Instruction *I) const; 54 55 private: 56 void getAnalysisUsage(AnalysisUsage &AU) const override; 57 bool runOnFunction(Function &F) override; 58 void print(raw_ostream &OS, const Module*) const override; 59 60 /// The function that we analyze. 61 Function *F; 62 /// Target information. 63 const TargetTransformInfo *TTI; 64 }; 65 } // End of anonymous namespace 66 67 // Register this pass. 68 char CostModelAnalysis::ID = 0; 69 static const char cm_name[] = "Cost Model Analysis"; 70 INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) 71 INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) 72 73 FunctionPass *llvm::createCostModelAnalysisPass() { 74 return new CostModelAnalysis(); 75 } 76 77 void 78 CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 79 AU.setPreservesAll(); 80 } 81 82 bool 83 CostModelAnalysis::runOnFunction(Function &F) { 84 this->F = &F; 85 TTI = getAnalysisIfAvailable<TargetTransformInfo>(); 86 87 return false; 88 } 89 90 static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) { 91 for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) 92 if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) 93 return false; 94 return true; 95 } 96 97 static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { 98 TargetTransformInfo::OperandValueKind OpInfo = 99 TargetTransformInfo::OK_AnyValue; 100 101 // Check for a splat of a constant or for a non uniform vector of constants. 102 if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { 103 OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; 104 if (cast<Constant>(V)->getSplatValue() != nullptr) 105 OpInfo = TargetTransformInfo::OK_UniformConstantValue; 106 } 107 108 return OpInfo; 109 } 110 111 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, 112 unsigned Level) { 113 // We don't need a shuffle if we just want to have element 0 in position 0 of 114 // the vector. 115 if (!SI && Level == 0 && IsLeft) 116 return true; 117 else if (!SI) 118 return false; 119 120 SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); 121 122 // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether 123 // we look at the left or right side. 124 for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) 125 Mask[i] = val; 126 127 SmallVector<int, 16> ActualMask = SI->getShuffleMask(); 128 if (Mask != ActualMask) 129 return false; 130 131 return true; 132 } 133 134 static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, 135 unsigned Level, unsigned NumLevels) { 136 // Match one level of pairwise operations. 137 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, 138 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> 139 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, 140 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 141 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 142 if (BinOp == nullptr) 143 return false; 144 145 assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); 146 147 unsigned Opcode = BinOp->getOpcode(); 148 Value *L = BinOp->getOperand(0); 149 Value *R = BinOp->getOperand(1); 150 151 ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L); 152 if (!LS && Level) 153 return false; 154 ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R); 155 if (!RS && Level) 156 return false; 157 158 // On level 0 we can omit one shufflevector instruction. 159 if (!Level && !RS && !LS) 160 return false; 161 162 // Shuffle inputs must match. 163 Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; 164 Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; 165 Value *NextLevelOp = nullptr; 166 if (NextLevelOpR && NextLevelOpL) { 167 // If we have two shuffles their operands must match. 168 if (NextLevelOpL != NextLevelOpR) 169 return false; 170 171 NextLevelOp = NextLevelOpL; 172 } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { 173 // On the first level we can omit the shufflevector <0, undef,...>. So the 174 // input to the other shufflevector <1, undef> must match with one of the 175 // inputs to the current binary operation. 176 // Example: 177 // %NextLevelOpL = shufflevector %R, <1, undef ...> 178 // %BinOp = fadd %NextLevelOpL, %R 179 if (NextLevelOpL && NextLevelOpL != R) 180 return false; 181 else if (NextLevelOpR && NextLevelOpR != L) 182 return false; 183 184 NextLevelOp = NextLevelOpL ? R : L; 185 } else 186 return false; 187 188 // Check that the next levels binary operation exists and matches with the 189 // current one. 190 BinaryOperator *NextLevelBinOp = nullptr; 191 if (Level + 1 != NumLevels) { 192 if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) 193 return false; 194 else if (NextLevelBinOp->getOpcode() != Opcode) 195 return false; 196 } 197 198 // Shuffle mask for pairwise operation must match. 199 if (matchPairwiseShuffleMask(LS, true, Level)) { 200 if (!matchPairwiseShuffleMask(RS, false, Level)) 201 return false; 202 } else if (matchPairwiseShuffleMask(RS, true, Level)) { 203 if (!matchPairwiseShuffleMask(LS, false, Level)) 204 return false; 205 } else 206 return false; 207 208 if (++Level == NumLevels) 209 return true; 210 211 // Match next level. 212 return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); 213 } 214 215 static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, 216 unsigned &Opcode, Type *&Ty) { 217 if (!EnableReduxCost) 218 return false; 219 220 // Need to extract the first element. 221 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); 222 unsigned Idx = ~0u; 223 if (CI) 224 Idx = CI->getZExtValue(); 225 if (Idx != 0) 226 return false; 227 228 BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); 229 if (!RdxStart) 230 return false; 231 232 Type *VecTy = ReduxRoot->getOperand(0)->getType(); 233 unsigned NumVecElems = VecTy->getVectorNumElements(); 234 if (!isPowerOf2_32(NumVecElems)) 235 return false; 236 237 // We look for a sequence of shuffle,shuffle,add triples like the following 238 // that builds a pairwise reduction tree. 239 // 240 // (X0, X1, X2, X3) 241 // (X0 + X1, X2 + X3, undef, undef) 242 // ((X0 + X1) + (X2 + X3), undef, undef, undef) 243 // 244 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, 245 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> 246 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, 247 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 248 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 249 // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, 250 // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 251 // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, 252 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 253 // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 254 // %r = extractelement <4 x float> %bin.rdx8, i32 0 255 if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) 256 return false; 257 258 Opcode = RdxStart->getOpcode(); 259 Ty = VecTy; 260 261 return true; 262 } 263 264 static std::pair<Value *, ShuffleVectorInst *> 265 getShuffleAndOtherOprd(BinaryOperator *B) { 266 267 Value *L = B->getOperand(0); 268 Value *R = B->getOperand(1); 269 ShuffleVectorInst *S = nullptr; 270 271 if ((S = dyn_cast<ShuffleVectorInst>(L))) 272 return std::make_pair(R, S); 273 274 S = dyn_cast<ShuffleVectorInst>(R); 275 return std::make_pair(L, S); 276 } 277 278 static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, 279 unsigned &Opcode, Type *&Ty) { 280 if (!EnableReduxCost) 281 return false; 282 283 // Need to extract the first element. 284 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); 285 unsigned Idx = ~0u; 286 if (CI) 287 Idx = CI->getZExtValue(); 288 if (Idx != 0) 289 return false; 290 291 BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); 292 if (!RdxStart) 293 return false; 294 unsigned RdxOpcode = RdxStart->getOpcode(); 295 296 Type *VecTy = ReduxRoot->getOperand(0)->getType(); 297 unsigned NumVecElems = VecTy->getVectorNumElements(); 298 if (!isPowerOf2_32(NumVecElems)) 299 return false; 300 301 // We look for a sequence of shuffles and adds like the following matching one 302 // fadd, shuffle vector pair at a time. 303 // 304 // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, 305 // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 306 // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf 307 // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, 308 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 309 // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 310 // %r = extractelement <4 x float> %bin.rdx8, i32 0 311 312 unsigned MaskStart = 1; 313 Value *RdxOp = RdxStart; 314 SmallVector<int, 32> ShuffleMask(NumVecElems, 0); 315 unsigned NumVecElemsRemain = NumVecElems; 316 while (NumVecElemsRemain - 1) { 317 // Check for the right reduction operation. 318 BinaryOperator *BinOp; 319 if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp))) 320 return false; 321 if (BinOp->getOpcode() != RdxOpcode) 322 return false; 323 324 Value *NextRdxOp; 325 ShuffleVectorInst *Shuffle; 326 std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); 327 328 // Check the current reduction operation and the shuffle use the same value. 329 if (Shuffle == nullptr) 330 return false; 331 if (Shuffle->getOperand(0) != NextRdxOp) 332 return false; 333 334 // Check that shuffle masks matches. 335 for (unsigned j = 0; j != MaskStart; ++j) 336 ShuffleMask[j] = MaskStart + j; 337 // Fill the rest of the mask with -1 for undef. 338 std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); 339 340 SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); 341 if (ShuffleMask != Mask) 342 return false; 343 344 RdxOp = NextRdxOp; 345 NumVecElemsRemain /= 2; 346 MaskStart *= 2; 347 } 348 349 Opcode = RdxOpcode; 350 Ty = VecTy; 351 return true; 352 } 353 354 unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { 355 if (!TTI) 356 return -1; 357 358 switch (I->getOpcode()) { 359 case Instruction::GetElementPtr:{ 360 Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); 361 return TTI->getAddressComputationCost(ValTy); 362 } 363 364 case Instruction::Ret: 365 case Instruction::PHI: 366 case Instruction::Br: { 367 return TTI->getCFInstrCost(I->getOpcode()); 368 } 369 case Instruction::Add: 370 case Instruction::FAdd: 371 case Instruction::Sub: 372 case Instruction::FSub: 373 case Instruction::Mul: 374 case Instruction::FMul: 375 case Instruction::UDiv: 376 case Instruction::SDiv: 377 case Instruction::FDiv: 378 case Instruction::URem: 379 case Instruction::SRem: 380 case Instruction::FRem: 381 case Instruction::Shl: 382 case Instruction::LShr: 383 case Instruction::AShr: 384 case Instruction::And: 385 case Instruction::Or: 386 case Instruction::Xor: { 387 TargetTransformInfo::OperandValueKind Op1VK = 388 getOperandInfo(I->getOperand(0)); 389 TargetTransformInfo::OperandValueKind Op2VK = 390 getOperandInfo(I->getOperand(1)); 391 return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, 392 Op2VK); 393 } 394 case Instruction::Select: { 395 const SelectInst *SI = cast<SelectInst>(I); 396 Type *CondTy = SI->getCondition()->getType(); 397 return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); 398 } 399 case Instruction::ICmp: 400 case Instruction::FCmp: { 401 Type *ValTy = I->getOperand(0)->getType(); 402 return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); 403 } 404 case Instruction::Store: { 405 const StoreInst *SI = cast<StoreInst>(I); 406 Type *ValTy = SI->getValueOperand()->getType(); 407 return TTI->getMemoryOpCost(I->getOpcode(), ValTy, 408 SI->getAlignment(), 409 SI->getPointerAddressSpace()); 410 } 411 case Instruction::Load: { 412 const LoadInst *LI = cast<LoadInst>(I); 413 return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), 414 LI->getAlignment(), 415 LI->getPointerAddressSpace()); 416 } 417 case Instruction::ZExt: 418 case Instruction::SExt: 419 case Instruction::FPToUI: 420 case Instruction::FPToSI: 421 case Instruction::FPExt: 422 case Instruction::PtrToInt: 423 case Instruction::IntToPtr: 424 case Instruction::SIToFP: 425 case Instruction::UIToFP: 426 case Instruction::Trunc: 427 case Instruction::FPTrunc: 428 case Instruction::BitCast: 429 case Instruction::AddrSpaceCast: { 430 Type *SrcTy = I->getOperand(0)->getType(); 431 return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); 432 } 433 case Instruction::ExtractElement: { 434 const ExtractElementInst * EEI = cast<ExtractElementInst>(I); 435 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); 436 unsigned Idx = -1; 437 if (CI) 438 Idx = CI->getZExtValue(); 439 440 // Try to match a reduction sequence (series of shufflevector and vector 441 // adds followed by a extractelement). 442 unsigned ReduxOpCode; 443 Type *ReduxType; 444 445 if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) 446 return TTI->getReductionCost(ReduxOpCode, ReduxType, false); 447 else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) 448 return TTI->getReductionCost(ReduxOpCode, ReduxType, true); 449 450 return TTI->getVectorInstrCost(I->getOpcode(), 451 EEI->getOperand(0)->getType(), Idx); 452 } 453 case Instruction::InsertElement: { 454 const InsertElementInst * IE = cast<InsertElementInst>(I); 455 ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); 456 unsigned Idx = -1; 457 if (CI) 458 Idx = CI->getZExtValue(); 459 return TTI->getVectorInstrCost(I->getOpcode(), 460 IE->getType(), Idx); 461 } 462 case Instruction::ShuffleVector: { 463 const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); 464 Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); 465 unsigned NumVecElems = VecTypOp0->getVectorNumElements(); 466 SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); 467 468 if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) 469 return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, 470 nullptr); 471 return -1; 472 } 473 case Instruction::Call: 474 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 475 SmallVector<Type*, 4> Tys; 476 for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) 477 Tys.push_back(II->getArgOperand(J)->getType()); 478 479 return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), 480 Tys); 481 } 482 return -1; 483 default: 484 // We don't have any information on this instruction. 485 return -1; 486 } 487 } 488 489 void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { 490 if (!F) 491 return; 492 493 for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { 494 for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { 495 Instruction *Inst = it; 496 unsigned Cost = getInstructionCost(Inst); 497 if (Cost != (unsigned)-1) 498 OS << "Cost Model: Found an estimated cost of " << Cost; 499 else 500 OS << "Cost Model: Unknown cost"; 501 502 OS << " for instruction: "<< *Inst << "\n"; 503 } 504 } 505 } 506