1 //===- CostModel.cpp ------ Cost Model Analysis ---------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the cost model analysis. It provides a very basic cost 11 // estimation for LLVM-IR. This analysis uses the services of the codegen 12 // to approximate the cost of any IR instruction when lowered to machine 13 // instructions. The cost results are unit-less and the cost number represents 14 // the throughput of the machine assuming that all loads hit the cache, all 15 // branches are predicted, etc. The cost numbers can be added in order to 16 // compare two or more transformation alternatives. 17 // 18 //===----------------------------------------------------------------------===// 19 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/Analysis/Passes.h" 22 #include "llvm/Analysis/TargetTransformInfo.h" 23 #include "llvm/IR/Function.h" 24 #include "llvm/IR/Instructions.h" 25 #include "llvm/IR/IntrinsicInst.h" 26 #include "llvm/IR/Value.h" 27 #include "llvm/Pass.h" 28 #include "llvm/Support/CommandLine.h" 29 #include "llvm/Support/Debug.h" 30 #include "llvm/Support/raw_ostream.h" 31 using namespace llvm; 32 33 #define CM_NAME "cost-model" 34 #define DEBUG_TYPE CM_NAME 35 36 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false), 37 cl::Hidden, 38 cl::desc("Recognize reduction patterns.")); 39 40 namespace { 41 class CostModelAnalysis : public FunctionPass { 42 43 public: 44 static char ID; // Class identification, replacement for typeinfo 45 CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) { 46 initializeCostModelAnalysisPass( 47 *PassRegistry::getPassRegistry()); 48 } 49 50 /// Returns the expected cost of the instruction. 51 /// Returns -1 if the cost is unknown. 52 /// Note, this method does not cache the cost calculation and it 53 /// can be expensive in some cases. 54 unsigned getInstructionCost(const Instruction *I) const; 55 56 private: 57 void getAnalysisUsage(AnalysisUsage &AU) const override; 58 bool runOnFunction(Function &F) override; 59 void print(raw_ostream &OS, const Module*) const override; 60 61 /// The function that we analyze. 62 Function *F; 63 /// Target information. 64 const TargetTransformInfo *TTI; 65 }; 66 } // End of anonymous namespace 67 68 // Register this pass. 69 char CostModelAnalysis::ID = 0; 70 static const char cm_name[] = "Cost Model Analysis"; 71 INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true) 72 INITIALIZE_PASS_END (CostModelAnalysis, CM_NAME, cm_name, false, true) 73 74 FunctionPass *llvm::createCostModelAnalysisPass() { 75 return new CostModelAnalysis(); 76 } 77 78 void 79 CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { 80 AU.setPreservesAll(); 81 } 82 83 bool 84 CostModelAnalysis::runOnFunction(Function &F) { 85 this->F = &F; 86 auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>(); 87 TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr; 88 89 return false; 90 } 91 92 static bool isReverseVectorMask(SmallVectorImpl<int> &Mask) { 93 for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) 94 if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) 95 return false; 96 return true; 97 } 98 99 static bool isAlternateVectorMask(SmallVectorImpl<int> &Mask) { 100 bool isAlternate = true; 101 unsigned MaskSize = Mask.size(); 102 103 // Example: shufflevector A, B, <0,5,2,7> 104 for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { 105 if (Mask[i] < 0) 106 continue; 107 isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i); 108 } 109 110 if (isAlternate) 111 return true; 112 113 isAlternate = true; 114 // Example: shufflevector A, B, <4,1,6,3> 115 for (unsigned i = 0; i < MaskSize && isAlternate; ++i) { 116 if (Mask[i] < 0) 117 continue; 118 isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i); 119 } 120 121 return isAlternate; 122 } 123 124 static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { 125 TargetTransformInfo::OperandValueKind OpInfo = 126 TargetTransformInfo::OK_AnyValue; 127 128 // Check for a splat of a constant or for a non uniform vector of constants. 129 if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) { 130 OpInfo = TargetTransformInfo::OK_NonUniformConstantValue; 131 if (cast<Constant>(V)->getSplatValue() != nullptr) 132 OpInfo = TargetTransformInfo::OK_UniformConstantValue; 133 } 134 135 return OpInfo; 136 } 137 138 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, 139 unsigned Level) { 140 // We don't need a shuffle if we just want to have element 0 in position 0 of 141 // the vector. 142 if (!SI && Level == 0 && IsLeft) 143 return true; 144 else if (!SI) 145 return false; 146 147 SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1); 148 149 // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether 150 // we look at the left or right side. 151 for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) 152 Mask[i] = val; 153 154 SmallVector<int, 16> ActualMask = SI->getShuffleMask(); 155 if (Mask != ActualMask) 156 return false; 157 158 return true; 159 } 160 161 static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, 162 unsigned Level, unsigned NumLevels) { 163 // Match one level of pairwise operations. 164 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, 165 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> 166 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, 167 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 168 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 169 if (BinOp == nullptr) 170 return false; 171 172 assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); 173 174 unsigned Opcode = BinOp->getOpcode(); 175 Value *L = BinOp->getOperand(0); 176 Value *R = BinOp->getOperand(1); 177 178 ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(L); 179 if (!LS && Level) 180 return false; 181 ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(R); 182 if (!RS && Level) 183 return false; 184 185 // On level 0 we can omit one shufflevector instruction. 186 if (!Level && !RS && !LS) 187 return false; 188 189 // Shuffle inputs must match. 190 Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr; 191 Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr; 192 Value *NextLevelOp = nullptr; 193 if (NextLevelOpR && NextLevelOpL) { 194 // If we have two shuffles their operands must match. 195 if (NextLevelOpL != NextLevelOpR) 196 return false; 197 198 NextLevelOp = NextLevelOpL; 199 } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { 200 // On the first level we can omit the shufflevector <0, undef,...>. So the 201 // input to the other shufflevector <1, undef> must match with one of the 202 // inputs to the current binary operation. 203 // Example: 204 // %NextLevelOpL = shufflevector %R, <1, undef ...> 205 // %BinOp = fadd %NextLevelOpL, %R 206 if (NextLevelOpL && NextLevelOpL != R) 207 return false; 208 else if (NextLevelOpR && NextLevelOpR != L) 209 return false; 210 211 NextLevelOp = NextLevelOpL ? R : L; 212 } else 213 return false; 214 215 // Check that the next levels binary operation exists and matches with the 216 // current one. 217 BinaryOperator *NextLevelBinOp = nullptr; 218 if (Level + 1 != NumLevels) { 219 if (!(NextLevelBinOp = dyn_cast<BinaryOperator>(NextLevelOp))) 220 return false; 221 else if (NextLevelBinOp->getOpcode() != Opcode) 222 return false; 223 } 224 225 // Shuffle mask for pairwise operation must match. 226 if (matchPairwiseShuffleMask(LS, true, Level)) { 227 if (!matchPairwiseShuffleMask(RS, false, Level)) 228 return false; 229 } else if (matchPairwiseShuffleMask(RS, true, Level)) { 230 if (!matchPairwiseShuffleMask(LS, false, Level)) 231 return false; 232 } else 233 return false; 234 235 if (++Level == NumLevels) 236 return true; 237 238 // Match next level. 239 return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); 240 } 241 242 static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, 243 unsigned &Opcode, Type *&Ty) { 244 if (!EnableReduxCost) 245 return false; 246 247 // Need to extract the first element. 248 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); 249 unsigned Idx = ~0u; 250 if (CI) 251 Idx = CI->getZExtValue(); 252 if (Idx != 0) 253 return false; 254 255 BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); 256 if (!RdxStart) 257 return false; 258 259 Type *VecTy = ReduxRoot->getOperand(0)->getType(); 260 unsigned NumVecElems = VecTy->getVectorNumElements(); 261 if (!isPowerOf2_32(NumVecElems)) 262 return false; 263 264 // We look for a sequence of shuffle,shuffle,add triples like the following 265 // that builds a pairwise reduction tree. 266 // 267 // (X0, X1, X2, X3) 268 // (X0 + X1, X2 + X3, undef, undef) 269 // ((X0 + X1) + (X2 + X3), undef, undef, undef) 270 // 271 // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, 272 // <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef> 273 // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, 274 // <4 x i32> <i32 1, i32 3, i32 undef, i32 undef> 275 // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 276 // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, 277 // <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef> 278 // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, 279 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 280 // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 281 // %r = extractelement <4 x float> %bin.rdx8, i32 0 282 if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) 283 return false; 284 285 Opcode = RdxStart->getOpcode(); 286 Ty = VecTy; 287 288 return true; 289 } 290 291 static std::pair<Value *, ShuffleVectorInst *> 292 getShuffleAndOtherOprd(BinaryOperator *B) { 293 294 Value *L = B->getOperand(0); 295 Value *R = B->getOperand(1); 296 ShuffleVectorInst *S = nullptr; 297 298 if ((S = dyn_cast<ShuffleVectorInst>(L))) 299 return std::make_pair(R, S); 300 301 S = dyn_cast<ShuffleVectorInst>(R); 302 return std::make_pair(L, S); 303 } 304 305 static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, 306 unsigned &Opcode, Type *&Ty) { 307 if (!EnableReduxCost) 308 return false; 309 310 // Need to extract the first element. 311 ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1)); 312 unsigned Idx = ~0u; 313 if (CI) 314 Idx = CI->getZExtValue(); 315 if (Idx != 0) 316 return false; 317 318 BinaryOperator *RdxStart = dyn_cast<BinaryOperator>(ReduxRoot->getOperand(0)); 319 if (!RdxStart) 320 return false; 321 unsigned RdxOpcode = RdxStart->getOpcode(); 322 323 Type *VecTy = ReduxRoot->getOperand(0)->getType(); 324 unsigned NumVecElems = VecTy->getVectorNumElements(); 325 if (!isPowerOf2_32(NumVecElems)) 326 return false; 327 328 // We look for a sequence of shuffles and adds like the following matching one 329 // fadd, shuffle vector pair at a time. 330 // 331 // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, 332 // <4 x i32> <i32 2, i32 3, i32 undef, i32 undef> 333 // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf 334 // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, 335 // <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef> 336 // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 337 // %r = extractelement <4 x float> %bin.rdx8, i32 0 338 339 unsigned MaskStart = 1; 340 Value *RdxOp = RdxStart; 341 SmallVector<int, 32> ShuffleMask(NumVecElems, 0); 342 unsigned NumVecElemsRemain = NumVecElems; 343 while (NumVecElemsRemain - 1) { 344 // Check for the right reduction operation. 345 BinaryOperator *BinOp; 346 if (!(BinOp = dyn_cast<BinaryOperator>(RdxOp))) 347 return false; 348 if (BinOp->getOpcode() != RdxOpcode) 349 return false; 350 351 Value *NextRdxOp; 352 ShuffleVectorInst *Shuffle; 353 std::tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); 354 355 // Check the current reduction operation and the shuffle use the same value. 356 if (Shuffle == nullptr) 357 return false; 358 if (Shuffle->getOperand(0) != NextRdxOp) 359 return false; 360 361 // Check that shuffle masks matches. 362 for (unsigned j = 0; j != MaskStart; ++j) 363 ShuffleMask[j] = MaskStart + j; 364 // Fill the rest of the mask with -1 for undef. 365 std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); 366 367 SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); 368 if (ShuffleMask != Mask) 369 return false; 370 371 RdxOp = NextRdxOp; 372 NumVecElemsRemain /= 2; 373 MaskStart *= 2; 374 } 375 376 Opcode = RdxOpcode; 377 Ty = VecTy; 378 return true; 379 } 380 381 unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { 382 if (!TTI) 383 return -1; 384 385 switch (I->getOpcode()) { 386 case Instruction::GetElementPtr:{ 387 Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); 388 return TTI->getAddressComputationCost(ValTy); 389 } 390 391 case Instruction::Ret: 392 case Instruction::PHI: 393 case Instruction::Br: { 394 return TTI->getCFInstrCost(I->getOpcode()); 395 } 396 case Instruction::Add: 397 case Instruction::FAdd: 398 case Instruction::Sub: 399 case Instruction::FSub: 400 case Instruction::Mul: 401 case Instruction::FMul: 402 case Instruction::UDiv: 403 case Instruction::SDiv: 404 case Instruction::FDiv: 405 case Instruction::URem: 406 case Instruction::SRem: 407 case Instruction::FRem: 408 case Instruction::Shl: 409 case Instruction::LShr: 410 case Instruction::AShr: 411 case Instruction::And: 412 case Instruction::Or: 413 case Instruction::Xor: { 414 TargetTransformInfo::OperandValueKind Op1VK = 415 getOperandInfo(I->getOperand(0)); 416 TargetTransformInfo::OperandValueKind Op2VK = 417 getOperandInfo(I->getOperand(1)); 418 return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, 419 Op2VK); 420 } 421 case Instruction::Select: { 422 const SelectInst *SI = cast<SelectInst>(I); 423 Type *CondTy = SI->getCondition()->getType(); 424 return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); 425 } 426 case Instruction::ICmp: 427 case Instruction::FCmp: { 428 Type *ValTy = I->getOperand(0)->getType(); 429 return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); 430 } 431 case Instruction::Store: { 432 const StoreInst *SI = cast<StoreInst>(I); 433 Type *ValTy = SI->getValueOperand()->getType(); 434 return TTI->getMemoryOpCost(I->getOpcode(), ValTy, 435 SI->getAlignment(), 436 SI->getPointerAddressSpace()); 437 } 438 case Instruction::Load: { 439 const LoadInst *LI = cast<LoadInst>(I); 440 return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), 441 LI->getAlignment(), 442 LI->getPointerAddressSpace()); 443 } 444 case Instruction::ZExt: 445 case Instruction::SExt: 446 case Instruction::FPToUI: 447 case Instruction::FPToSI: 448 case Instruction::FPExt: 449 case Instruction::PtrToInt: 450 case Instruction::IntToPtr: 451 case Instruction::SIToFP: 452 case Instruction::UIToFP: 453 case Instruction::Trunc: 454 case Instruction::FPTrunc: 455 case Instruction::BitCast: 456 case Instruction::AddrSpaceCast: { 457 Type *SrcTy = I->getOperand(0)->getType(); 458 return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); 459 } 460 case Instruction::ExtractElement: { 461 const ExtractElementInst * EEI = cast<ExtractElementInst>(I); 462 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1)); 463 unsigned Idx = -1; 464 if (CI) 465 Idx = CI->getZExtValue(); 466 467 // Try to match a reduction sequence (series of shufflevector and vector 468 // adds followed by a extractelement). 469 unsigned ReduxOpCode; 470 Type *ReduxType; 471 472 if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) 473 return TTI->getReductionCost(ReduxOpCode, ReduxType, false); 474 else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) 475 return TTI->getReductionCost(ReduxOpCode, ReduxType, true); 476 477 return TTI->getVectorInstrCost(I->getOpcode(), 478 EEI->getOperand(0)->getType(), Idx); 479 } 480 case Instruction::InsertElement: { 481 const InsertElementInst * IE = cast<InsertElementInst>(I); 482 ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2)); 483 unsigned Idx = -1; 484 if (CI) 485 Idx = CI->getZExtValue(); 486 return TTI->getVectorInstrCost(I->getOpcode(), 487 IE->getType(), Idx); 488 } 489 case Instruction::ShuffleVector: { 490 const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I); 491 Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); 492 unsigned NumVecElems = VecTypOp0->getVectorNumElements(); 493 SmallVector<int, 16> Mask = Shuffle->getShuffleMask(); 494 495 if (NumVecElems == Mask.size()) { 496 if (isReverseVectorMask(Mask)) 497 return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 498 0, nullptr); 499 if (isAlternateVectorMask(Mask)) 500 return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate, 501 VecTypOp0, 0, nullptr); 502 } 503 504 return -1; 505 } 506 case Instruction::Call: 507 if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 508 SmallVector<Type*, 4> Tys; 509 for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) 510 Tys.push_back(II->getArgOperand(J)->getType()); 511 512 return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), 513 Tys); 514 } 515 return -1; 516 default: 517 // We don't have any information on this instruction. 518 return -1; 519 } 520 } 521 522 void CostModelAnalysis::print(raw_ostream &OS, const Module*) const { 523 if (!F) 524 return; 525 526 for (Function::iterator B = F->begin(), BE = F->end(); B != BE; ++B) { 527 for (BasicBlock::iterator it = B->begin(), e = B->end(); it != e; ++it) { 528 Instruction *Inst = it; 529 unsigned Cost = getInstructionCost(Inst); 530 if (Cost != (unsigned)-1) 531 OS << "Cost Model: Found an estimated cost of " << Cost; 532 else 533 OS << "Cost Model: Unknown cost"; 534 535 OS << " for instruction: "<< *Inst << "\n"; 536 } 537 } 538 } 539