1 //===- CostModel.cpp ------ Cost Model Analysis ---------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines the cost model analysis. It provides a very basic cost
11 // estimation for LLVM-IR. This analysis uses the services of the codegen
12 // to approximate the cost of any IR instruction when lowered to machine
13 // instructions. The cost results are unit-less and the cost number represents
14 // the throughput of the machine assuming that all loads hit the cache, all
15 // branches are predicted, etc. The cost numbers can be added in order to
16 // compare two or more transformation alternatives.
17 //
18 //===----------------------------------------------------------------------===//
19 
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/Analysis/Passes.h"
22 #include "llvm/Analysis/TargetTransformInfo.h"
23 #include "llvm/Analysis/VectorUtils.h"
24 #include "llvm/IR/Function.h"
25 #include "llvm/IR/Instructions.h"
26 #include "llvm/IR/IntrinsicInst.h"
27 #include "llvm/IR/PatternMatch.h"
28 #include "llvm/IR/Value.h"
29 #include "llvm/Pass.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Support/raw_ostream.h"
33 using namespace llvm;
34 using namespace PatternMatch;
35 
36 #define CM_NAME "cost-model"
37 #define DEBUG_TYPE CM_NAME
38 
39 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
40                                      cl::Hidden,
41                                      cl::desc("Recognize reduction patterns."));
42 
43 namespace {
44   class CostModelAnalysis : public FunctionPass {
45 
46   public:
47     static char ID; // Class identification, replacement for typeinfo
48     CostModelAnalysis() : FunctionPass(ID), F(nullptr), TTI(nullptr) {
49       initializeCostModelAnalysisPass(
50         *PassRegistry::getPassRegistry());
51     }
52 
53     /// Returns the expected cost of the instruction.
54     /// Returns -1 if the cost is unknown.
55     /// Note, this method does not cache the cost calculation and it
56     /// can be expensive in some cases.
57     unsigned getInstructionCost(const Instruction *I) const;
58 
59   private:
60     void getAnalysisUsage(AnalysisUsage &AU) const override;
61     bool runOnFunction(Function &F) override;
62     void print(raw_ostream &OS, const Module*) const override;
63 
64     /// The function that we analyze.
65     Function *F;
66     /// Target information.
67     const TargetTransformInfo *TTI;
68   };
69 }  // End of anonymous namespace
70 
71 // Register this pass.
72 char CostModelAnalysis::ID = 0;
73 static const char cm_name[] = "Cost Model Analysis";
74 INITIALIZE_PASS_BEGIN(CostModelAnalysis, CM_NAME, cm_name, false, true)
75 INITIALIZE_PASS_END  (CostModelAnalysis, CM_NAME, cm_name, false, true)
76 
77 FunctionPass *llvm::createCostModelAnalysisPass() {
78   return new CostModelAnalysis();
79 }
80 
81 void
82 CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
83   AU.setPreservesAll();
84 }
85 
86 bool
87 CostModelAnalysis::runOnFunction(Function &F) {
88  this->F = &F;
89  auto *TTIWP = getAnalysisIfAvailable<TargetTransformInfoWrapperPass>();
90  TTI = TTIWP ? &TTIWP->getTTI(F) : nullptr;
91 
92  return false;
93 }
94 
95 static bool isReverseVectorMask(ArrayRef<int> Mask) {
96   for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i)
97     if (Mask[i] >= 0 && Mask[i] != (int)(MaskSize - 1 - i))
98       return false;
99   return true;
100 }
101 
102 static bool isSingleSourceVectorMask(ArrayRef<int> Mask) {
103   bool Vec0 = false;
104   bool Vec1 = false;
105   for (unsigned i = 0, NumVecElts = Mask.size(); i < NumVecElts; ++i) {
106     if (Mask[i] >= 0) {
107       if ((unsigned)Mask[i] >= NumVecElts)
108         Vec1 = true;
109       else
110         Vec0 = true;
111     }
112   }
113   return !(Vec0 && Vec1);
114 }
115 
116 static bool isZeroEltBroadcastVectorMask(ArrayRef<int> Mask) {
117   for (unsigned i = 0; i < Mask.size(); ++i)
118     if (Mask[i] > 0)
119       return false;
120   return true;
121 }
122 
123 static bool isAlternateVectorMask(ArrayRef<int> Mask) {
124   bool isAlternate = true;
125   unsigned MaskSize = Mask.size();
126 
127   // Example: shufflevector A, B, <0,5,2,7>
128   for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
129     if (Mask[i] < 0)
130       continue;
131     isAlternate = Mask[i] == (int)((i & 1) ? MaskSize + i : i);
132   }
133 
134   if (isAlternate)
135     return true;
136 
137   isAlternate = true;
138   // Example: shufflevector A, B, <4,1,6,3>
139   for (unsigned i = 0; i < MaskSize && isAlternate; ++i) {
140     if (Mask[i] < 0)
141       continue;
142     isAlternate = Mask[i] == (int)((i & 1) ? i : MaskSize + i);
143   }
144 
145   return isAlternate;
146 }
147 
148 static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) {
149   TargetTransformInfo::OperandValueKind OpInfo =
150       TargetTransformInfo::OK_AnyValue;
151 
152   // Check for a splat of a constant or for a non uniform vector of constants.
153   if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
154     OpInfo = TargetTransformInfo::OK_NonUniformConstantValue;
155     if (cast<Constant>(V)->getSplatValue() != nullptr)
156       OpInfo = TargetTransformInfo::OK_UniformConstantValue;
157   }
158 
159   // Check for a splat of a uniform value. This is not loop aware, so return
160   // true only for the obviously uniform cases (argument, globalvalue)
161   const Value *Splat = getSplatValue(V);
162   if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
163     OpInfo = TargetTransformInfo::OK_UniformValue;
164 
165   return OpInfo;
166 }
167 
168 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
169                                      unsigned Level) {
170   // We don't need a shuffle if we just want to have element 0 in position 0 of
171   // the vector.
172   if (!SI && Level == 0 && IsLeft)
173     return true;
174   else if (!SI)
175     return false;
176 
177   SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
178 
179   // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
180   // we look at the left or right side.
181   for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
182     Mask[i] = val;
183 
184   SmallVector<int, 16> ActualMask = SI->getShuffleMask();
185   return Mask == ActualMask;
186 }
187 
188 namespace {
189 /// Kind of the reduction data.
190 enum ReductionKind {
191   RK_None,           /// Not a reduction.
192   RK_Arithmetic,     /// Binary reduction data.
193   RK_MinMax,         /// Min/max reduction data.
194   RK_UnsignedMinMax, /// Unsigned min/max reduction data.
195 };
196 /// Contains opcode + LHS/RHS parts of the reduction operations.
197 struct ReductionData {
198   ReductionData() = delete;
199   ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
200       : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
201     assert(Kind != RK_None && "expected binary or min/max reduction only.");
202   }
203   unsigned Opcode = 0;
204   Value *LHS = nullptr;
205   Value *RHS = nullptr;
206   ReductionKind Kind = RK_None;
207   bool hasSameData(ReductionData &RD) const {
208     return Kind == RD.Kind && Opcode == RD.Opcode;
209   }
210 };
211 } // namespace
212 
213 static Optional<ReductionData> getReductionData(Instruction *I) {
214   Value *L, *R;
215   if (m_BinOp(m_Value(L), m_Value(R)).match(I))
216     return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
217   if (auto *SI = dyn_cast<SelectInst>(I)) {
218     if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
219         m_SMax(m_Value(L), m_Value(R)).match(SI) ||
220         m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
221         m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
222         m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
223         m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
224       auto *CI = cast<CmpInst>(SI->getCondition());
225       return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
226     }
227     if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
228         m_UMax(m_Value(L), m_Value(R)).match(SI)) {
229       auto *CI = cast<CmpInst>(SI->getCondition());
230       return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
231     }
232   }
233   return llvm::None;
234 }
235 
236 static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
237                                                    unsigned Level,
238                                                    unsigned NumLevels) {
239   // Match one level of pairwise operations.
240   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
241   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
242   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
243   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
244   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
245   if (!I)
246     return RK_None;
247 
248   assert(I->getType()->isVectorTy() && "Expecting a vector type");
249 
250   Optional<ReductionData> RD = getReductionData(I);
251   if (!RD)
252     return RK_None;
253 
254   ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
255   if (!LS && Level)
256     return RK_None;
257   ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
258   if (!RS && Level)
259     return RK_None;
260 
261   // On level 0 we can omit one shufflevector instruction.
262   if (!Level && !RS && !LS)
263     return RK_None;
264 
265   // Shuffle inputs must match.
266   Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
267   Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
268   Value *NextLevelOp = nullptr;
269   if (NextLevelOpR && NextLevelOpL) {
270     // If we have two shuffles their operands must match.
271     if (NextLevelOpL != NextLevelOpR)
272       return RK_None;
273 
274     NextLevelOp = NextLevelOpL;
275   } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
276     // On the first level we can omit the shufflevector <0, undef,...>. So the
277     // input to the other shufflevector <1, undef> must match with one of the
278     // inputs to the current binary operation.
279     // Example:
280     //  %NextLevelOpL = shufflevector %R, <1, undef ...>
281     //  %BinOp        = fadd          %NextLevelOpL, %R
282     if (NextLevelOpL && NextLevelOpL != RD->RHS)
283       return RK_None;
284     else if (NextLevelOpR && NextLevelOpR != RD->LHS)
285       return RK_None;
286 
287     NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
288   } else {
289     return RK_None;
290   }
291 
292   // Check that the next levels binary operation exists and matches with the
293   // current one.
294   if (Level + 1 != NumLevels) {
295     Optional<ReductionData> NextLevelRD =
296         getReductionData(cast<Instruction>(NextLevelOp));
297     if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
298       return RK_None;
299   }
300 
301   // Shuffle mask for pairwise operation must match.
302   if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
303     if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
304       return RK_None;
305   } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
306     if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
307       return RK_None;
308   } else {
309     return RK_None;
310   }
311 
312   if (++Level == NumLevels)
313     return RD->Kind;
314 
315   // Match next level.
316   return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
317                                        NumLevels);
318 }
319 
320 static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
321                                             unsigned &Opcode, Type *&Ty) {
322   if (!EnableReduxCost)
323     return RK_None;
324 
325   // Need to extract the first element.
326   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
327   unsigned Idx = ~0u;
328   if (CI)
329     Idx = CI->getZExtValue();
330   if (Idx != 0)
331     return RK_None;
332 
333   auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
334   if (!RdxStart)
335     return RK_None;
336   Optional<ReductionData> RD = getReductionData(RdxStart);
337   if (!RD)
338     return RK_None;
339 
340   Type *VecTy = RdxStart->getType();
341   unsigned NumVecElems = VecTy->getVectorNumElements();
342   if (!isPowerOf2_32(NumVecElems))
343     return RK_None;
344 
345   // We look for a sequence of shuffle,shuffle,add triples like the following
346   // that builds a pairwise reduction tree.
347   //
348   //  (X0, X1, X2, X3)
349   //   (X0 + X1, X2 + X3, undef, undef)
350   //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
351   //
352   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
353   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
354   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
355   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
356   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
357   // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
358   //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
359   // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
360   //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
361   // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
362   // %r = extractelement <4 x float> %bin.rdx8, i32 0
363   if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
364       RK_None)
365     return RK_None;
366 
367   Opcode = RD->Opcode;
368   Ty = VecTy;
369 
370   return RD->Kind;
371 }
372 
373 static std::pair<Value *, ShuffleVectorInst *>
374 getShuffleAndOtherOprd(Value *L, Value *R) {
375   ShuffleVectorInst *S = nullptr;
376 
377   if ((S = dyn_cast<ShuffleVectorInst>(L)))
378     return std::make_pair(R, S);
379 
380   S = dyn_cast<ShuffleVectorInst>(R);
381   return std::make_pair(L, S);
382 }
383 
384 static ReductionKind
385 matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
386                               unsigned &Opcode, Type *&Ty) {
387   if (!EnableReduxCost)
388     return RK_None;
389 
390   // Need to extract the first element.
391   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
392   unsigned Idx = ~0u;
393   if (CI)
394     Idx = CI->getZExtValue();
395   if (Idx != 0)
396     return RK_None;
397 
398   auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
399   if (!RdxStart)
400     return RK_None;
401   Optional<ReductionData> RD = getReductionData(RdxStart);
402   if (!RD)
403     return RK_None;
404 
405   Type *VecTy = ReduxRoot->getOperand(0)->getType();
406   unsigned NumVecElems = VecTy->getVectorNumElements();
407   if (!isPowerOf2_32(NumVecElems))
408     return RK_None;
409 
410   // We look for a sequence of shuffles and adds like the following matching one
411   // fadd, shuffle vector pair at a time.
412   //
413   // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
414   //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
415   // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
416   // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
417   //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
418   // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
419   // %r = extractelement <4 x float> %bin.rdx8, i32 0
420 
421   unsigned MaskStart = 1;
422   Instruction *RdxOp = RdxStart;
423   SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
424   unsigned NumVecElemsRemain = NumVecElems;
425   while (NumVecElemsRemain - 1) {
426     // Check for the right reduction operation.
427     if (!RdxOp)
428       return RK_None;
429     Optional<ReductionData> RDLevel = getReductionData(RdxOp);
430     if (!RDLevel || !RDLevel->hasSameData(*RD))
431       return RK_None;
432 
433     Value *NextRdxOp;
434     ShuffleVectorInst *Shuffle;
435     std::tie(NextRdxOp, Shuffle) =
436         getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
437 
438     // Check the current reduction operation and the shuffle use the same value.
439     if (Shuffle == nullptr)
440       return RK_None;
441     if (Shuffle->getOperand(0) != NextRdxOp)
442       return RK_None;
443 
444     // Check that shuffle masks matches.
445     for (unsigned j = 0; j != MaskStart; ++j)
446       ShuffleMask[j] = MaskStart + j;
447     // Fill the rest of the mask with -1 for undef.
448     std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
449 
450     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
451     if (ShuffleMask != Mask)
452       return RK_None;
453 
454     RdxOp = dyn_cast<Instruction>(NextRdxOp);
455     NumVecElemsRemain /= 2;
456     MaskStart *= 2;
457   }
458 
459   Opcode = RD->Opcode;
460   Ty = VecTy;
461   return RD->Kind;
462 }
463 
464 unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const {
465   if (!TTI)
466     return -1;
467 
468   switch (I->getOpcode()) {
469   case Instruction::GetElementPtr:
470     return TTI->getUserCost(I);
471 
472   case Instruction::Ret:
473   case Instruction::PHI:
474   case Instruction::Br: {
475     return TTI->getCFInstrCost(I->getOpcode());
476   }
477   case Instruction::Add:
478   case Instruction::FAdd:
479   case Instruction::Sub:
480   case Instruction::FSub:
481   case Instruction::Mul:
482   case Instruction::FMul:
483   case Instruction::UDiv:
484   case Instruction::SDiv:
485   case Instruction::FDiv:
486   case Instruction::URem:
487   case Instruction::SRem:
488   case Instruction::FRem:
489   case Instruction::Shl:
490   case Instruction::LShr:
491   case Instruction::AShr:
492   case Instruction::And:
493   case Instruction::Or:
494   case Instruction::Xor: {
495     TargetTransformInfo::OperandValueKind Op1VK =
496       getOperandInfo(I->getOperand(0));
497     TargetTransformInfo::OperandValueKind Op2VK =
498       getOperandInfo(I->getOperand(1));
499     SmallVector<const Value*, 2> Operands(I->operand_values());
500     return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK,
501                                        Op2VK, TargetTransformInfo::OP_None,
502                                        TargetTransformInfo::OP_None,
503                                        Operands);
504   }
505   case Instruction::Select: {
506     const SelectInst *SI = cast<SelectInst>(I);
507     Type *CondTy = SI->getCondition()->getType();
508     return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
509   }
510   case Instruction::ICmp:
511   case Instruction::FCmp: {
512     Type *ValTy = I->getOperand(0)->getType();
513     return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
514   }
515   case Instruction::Store: {
516     const StoreInst *SI = cast<StoreInst>(I);
517     Type *ValTy = SI->getValueOperand()->getType();
518     return TTI->getMemoryOpCost(I->getOpcode(), ValTy,
519                                 SI->getAlignment(),
520                                 SI->getPointerAddressSpace(), I);
521   }
522   case Instruction::Load: {
523     const LoadInst *LI = cast<LoadInst>(I);
524     return TTI->getMemoryOpCost(I->getOpcode(), I->getType(),
525                                 LI->getAlignment(),
526                                 LI->getPointerAddressSpace(), I);
527   }
528   case Instruction::ZExt:
529   case Instruction::SExt:
530   case Instruction::FPToUI:
531   case Instruction::FPToSI:
532   case Instruction::FPExt:
533   case Instruction::PtrToInt:
534   case Instruction::IntToPtr:
535   case Instruction::SIToFP:
536   case Instruction::UIToFP:
537   case Instruction::Trunc:
538   case Instruction::FPTrunc:
539   case Instruction::BitCast:
540   case Instruction::AddrSpaceCast: {
541     Type *SrcTy = I->getOperand(0)->getType();
542     return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
543   }
544   case Instruction::ExtractElement: {
545     const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
546     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
547     unsigned Idx = -1;
548     if (CI)
549       Idx = CI->getZExtValue();
550 
551     // Try to match a reduction sequence (series of shufflevector and vector
552     // adds followed by a extractelement).
553     unsigned ReduxOpCode;
554     Type *ReduxType;
555 
556     switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
557     case RK_Arithmetic:
558       return TTI->getArithmeticReductionCost(ReduxOpCode, ReduxType,
559                                              /*IsPairwiseForm=*/false);
560     case RK_MinMax:
561       return TTI->getMinMaxReductionCost(
562           ReduxType, CmpInst::makeCmpResultType(ReduxType),
563           /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
564     case RK_UnsignedMinMax:
565       return TTI->getMinMaxReductionCost(
566           ReduxType, CmpInst::makeCmpResultType(ReduxType),
567           /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
568     case RK_None:
569       break;
570     }
571 
572     switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
573     case RK_Arithmetic:
574       return TTI->getArithmeticReductionCost(ReduxOpCode, ReduxType,
575                                              /*IsPairwiseForm=*/true);
576     case RK_MinMax:
577       return TTI->getMinMaxReductionCost(
578           ReduxType, CmpInst::makeCmpResultType(ReduxType),
579           /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
580     case RK_UnsignedMinMax:
581       return TTI->getMinMaxReductionCost(
582           ReduxType, CmpInst::makeCmpResultType(ReduxType),
583           /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
584     case RK_None:
585       break;
586     }
587 
588     return TTI->getVectorInstrCost(I->getOpcode(),
589                                    EEI->getOperand(0)->getType(), Idx);
590   }
591   case Instruction::InsertElement: {
592     const InsertElementInst * IE = cast<InsertElementInst>(I);
593     ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
594     unsigned Idx = -1;
595     if (CI)
596       Idx = CI->getZExtValue();
597     return TTI->getVectorInstrCost(I->getOpcode(),
598                                    IE->getType(), Idx);
599   }
600   case Instruction::ShuffleVector: {
601     const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
602     Type *VecTypOp0 = Shuffle->getOperand(0)->getType();
603     unsigned NumVecElems = VecTypOp0->getVectorNumElements();
604     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
605 
606     if (NumVecElems == Mask.size()) {
607       if (isReverseVectorMask(Mask))
608         return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0,
609                                    0, nullptr);
610       if (isAlternateVectorMask(Mask))
611         return TTI->getShuffleCost(TargetTransformInfo::SK_Alternate,
612                                    VecTypOp0, 0, nullptr);
613 
614       if (isZeroEltBroadcastVectorMask(Mask))
615         return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast,
616                                    VecTypOp0, 0, nullptr);
617 
618       if (isSingleSourceVectorMask(Mask))
619         return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteSingleSrc,
620                                    VecTypOp0, 0, nullptr);
621 
622       return TTI->getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc,
623                                  VecTypOp0, 0, nullptr);
624     }
625 
626     return -1;
627   }
628   case Instruction::Call:
629     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
630       SmallVector<Value *, 4> Args(II->arg_operands());
631 
632       FastMathFlags FMF;
633       if (auto *FPMO = dyn_cast<FPMathOperator>(II))
634         FMF = FPMO->getFastMathFlags();
635 
636       return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
637                                         Args, FMF);
638     }
639     return -1;
640   default:
641     // We don't have any information on this instruction.
642     return -1;
643   }
644 }
645 
646 void CostModelAnalysis::print(raw_ostream &OS, const Module*) const {
647   if (!F)
648     return;
649 
650   for (BasicBlock &B : *F) {
651     for (Instruction &Inst : B) {
652       unsigned Cost = getInstructionCost(&Inst);
653       if (Cost != (unsigned)-1)
654         OS << "Cost Model: Found an estimated cost of " << Cost;
655       else
656         OS << "Cost Model: Unknown cost";
657 
658       OS << " for instruction: " << Inst << "\n";
659     }
660   }
661 }
662