1 //===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Analysis/TargetTransformInfo.h"
10 #include "llvm/Analysis/TargetTransformInfoImpl.h"
11 #include "llvm/IR/CallSite.h"
12 #include "llvm/IR/DataLayout.h"
13 #include "llvm/IR/Instruction.h"
14 #include "llvm/IR/Instructions.h"
15 #include "llvm/IR/IntrinsicInst.h"
16 #include "llvm/IR/Module.h"
17 #include "llvm/IR/Operator.h"
18 #include "llvm/IR/PatternMatch.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/ErrorHandling.h"
21 #include <utility>
22 
23 using namespace llvm;
24 using namespace PatternMatch;
25 
26 #define DEBUG_TYPE "tti"
27 
28 static cl::opt<bool> EnableReduxCost("costmodel-reduxcost", cl::init(false),
29                                      cl::Hidden,
30                                      cl::desc("Recognize reduction patterns."));
31 
32 namespace {
33 /// No-op implementation of the TTI interface using the utility base
34 /// classes.
35 ///
36 /// This is used when no target specific information is available.
37 struct NoTTIImpl : TargetTransformInfoImplCRTPBase<NoTTIImpl> {
38   explicit NoTTIImpl(const DataLayout &DL)
39       : TargetTransformInfoImplCRTPBase<NoTTIImpl>(DL) {}
40 };
41 }
42 
43 bool HardwareLoopInfo::isHardwareLoopCandidate(ScalarEvolution &SE,
44                                                LoopInfo &LI, DominatorTree &DT,
45                                                bool ForceNestedLoop,
46                                                bool ForceHardwareLoopPHI) {
47   SmallVector<BasicBlock *, 4> ExitingBlocks;
48   L->getExitingBlocks(ExitingBlocks);
49 
50   for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
51                                                IE = ExitingBlocks.end();
52        I != IE; ++I) {
53     BasicBlock *BB = *I;
54 
55     // If we pass the updated counter back through a phi, we need to know
56     // which latch the updated value will be coming from.
57     if (!L->isLoopLatch(BB)) {
58       if (ForceHardwareLoopPHI || CounterInReg)
59         continue;
60     }
61 
62     const SCEV *EC = SE.getExitCount(L, BB);
63     if (isa<SCEVCouldNotCompute>(EC))
64       continue;
65     if (const SCEVConstant *ConstEC = dyn_cast<SCEVConstant>(EC)) {
66       if (ConstEC->getValue()->isZero())
67         continue;
68     } else if (!SE.isLoopInvariant(EC, L))
69       continue;
70 
71     if (SE.getTypeSizeInBits(EC->getType()) > CountType->getBitWidth())
72       continue;
73 
74     // If this exiting block is contained in a nested loop, it is not eligible
75     // for insertion of the branch-and-decrement since the inner loop would
76     // end up messing up the value in the CTR.
77     if (!IsNestingLegal && LI.getLoopFor(BB) != L && !ForceNestedLoop)
78       continue;
79 
80     // We now have a loop-invariant count of loop iterations (which is not the
81     // constant zero) for which we know that this loop will not exit via this
82     // existing block.
83 
84     // We need to make sure that this block will run on every loop iteration.
85     // For this to be true, we must dominate all blocks with backedges. Such
86     // blocks are in-loop predecessors to the header block.
87     bool NotAlways = false;
88     for (pred_iterator PI = pred_begin(L->getHeader()),
89                        PIE = pred_end(L->getHeader());
90          PI != PIE; ++PI) {
91       if (!L->contains(*PI))
92         continue;
93 
94       if (!DT.dominates(*I, *PI)) {
95         NotAlways = true;
96         break;
97       }
98     }
99 
100     if (NotAlways)
101       continue;
102 
103     // Make sure this blocks ends with a conditional branch.
104     Instruction *TI = BB->getTerminator();
105     if (!TI)
106       continue;
107 
108     if (BranchInst *BI = dyn_cast<BranchInst>(TI)) {
109       if (!BI->isConditional())
110         continue;
111 
112       ExitBranch = BI;
113     } else
114       continue;
115 
116     // Note that this block may not be the loop latch block, even if the loop
117     // has a latch block.
118     ExitBlock = *I;
119     ExitCount = EC;
120     break;
121   }
122 
123   if (!ExitBlock)
124     return false;
125   return true;
126 }
127 
128 TargetTransformInfo::TargetTransformInfo(const DataLayout &DL)
129     : TTIImpl(new Model<NoTTIImpl>(NoTTIImpl(DL))) {}
130 
131 TargetTransformInfo::~TargetTransformInfo() {}
132 
133 TargetTransformInfo::TargetTransformInfo(TargetTransformInfo &&Arg)
134     : TTIImpl(std::move(Arg.TTIImpl)) {}
135 
136 TargetTransformInfo &TargetTransformInfo::operator=(TargetTransformInfo &&RHS) {
137   TTIImpl = std::move(RHS.TTIImpl);
138   return *this;
139 }
140 
141 int TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty,
142                                           Type *OpTy) const {
143   int Cost = TTIImpl->getOperationCost(Opcode, Ty, OpTy);
144   assert(Cost >= 0 && "TTI should not produce negative costs!");
145   return Cost;
146 }
147 
148 int TargetTransformInfo::getCallCost(FunctionType *FTy, int NumArgs,
149                                      const User *U) const {
150   int Cost = TTIImpl->getCallCost(FTy, NumArgs, U);
151   assert(Cost >= 0 && "TTI should not produce negative costs!");
152   return Cost;
153 }
154 
155 int TargetTransformInfo::getCallCost(const Function *F,
156                                      ArrayRef<const Value *> Arguments,
157                                      const User *U) const {
158   int Cost = TTIImpl->getCallCost(F, Arguments, U);
159   assert(Cost >= 0 && "TTI should not produce negative costs!");
160   return Cost;
161 }
162 
163 unsigned TargetTransformInfo::getInliningThresholdMultiplier() const {
164   return TTIImpl->getInliningThresholdMultiplier();
165 }
166 
167 int TargetTransformInfo::getGEPCost(Type *PointeeType, const Value *Ptr,
168                                     ArrayRef<const Value *> Operands) const {
169   return TTIImpl->getGEPCost(PointeeType, Ptr, Operands);
170 }
171 
172 int TargetTransformInfo::getExtCost(const Instruction *I,
173                                     const Value *Src) const {
174   return TTIImpl->getExtCost(I, Src);
175 }
176 
177 int TargetTransformInfo::getIntrinsicCost(
178     Intrinsic::ID IID, Type *RetTy, ArrayRef<const Value *> Arguments,
179     const User *U) const {
180   int Cost = TTIImpl->getIntrinsicCost(IID, RetTy, Arguments, U);
181   assert(Cost >= 0 && "TTI should not produce negative costs!");
182   return Cost;
183 }
184 
185 unsigned
186 TargetTransformInfo::getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
187                                                       unsigned &JTSize) const {
188   return TTIImpl->getEstimatedNumberOfCaseClusters(SI, JTSize);
189 }
190 
191 int TargetTransformInfo::getUserCost(const User *U,
192     ArrayRef<const Value *> Operands) const {
193   int Cost = TTIImpl->getUserCost(U, Operands);
194   assert(Cost >= 0 && "TTI should not produce negative costs!");
195   return Cost;
196 }
197 
198 bool TargetTransformInfo::hasBranchDivergence() const {
199   return TTIImpl->hasBranchDivergence();
200 }
201 
202 bool TargetTransformInfo::isSourceOfDivergence(const Value *V) const {
203   return TTIImpl->isSourceOfDivergence(V);
204 }
205 
206 bool llvm::TargetTransformInfo::isAlwaysUniform(const Value *V) const {
207   return TTIImpl->isAlwaysUniform(V);
208 }
209 
210 unsigned TargetTransformInfo::getFlatAddressSpace() const {
211   return TTIImpl->getFlatAddressSpace();
212 }
213 
214 bool TargetTransformInfo::isLoweredToCall(const Function *F) const {
215   return TTIImpl->isLoweredToCall(F);
216 }
217 
218 bool TargetTransformInfo::isHardwareLoopProfitable(
219   Loop *L, ScalarEvolution &SE, AssumptionCache &AC,
220   TargetLibraryInfo *LibInfo, HardwareLoopInfo &HWLoopInfo) const {
221   return TTIImpl->isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
222 }
223 
224 void TargetTransformInfo::getUnrollingPreferences(
225     Loop *L, ScalarEvolution &SE, UnrollingPreferences &UP) const {
226   return TTIImpl->getUnrollingPreferences(L, SE, UP);
227 }
228 
229 bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const {
230   return TTIImpl->isLegalAddImmediate(Imm);
231 }
232 
233 bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const {
234   return TTIImpl->isLegalICmpImmediate(Imm);
235 }
236 
237 bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
238                                                 int64_t BaseOffset,
239                                                 bool HasBaseReg,
240                                                 int64_t Scale,
241                                                 unsigned AddrSpace,
242                                                 Instruction *I) const {
243   return TTIImpl->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
244                                         Scale, AddrSpace, I);
245 }
246 
247 bool TargetTransformInfo::isLSRCostLess(LSRCost &C1, LSRCost &C2) const {
248   return TTIImpl->isLSRCostLess(C1, C2);
249 }
250 
251 bool TargetTransformInfo::canMacroFuseCmp() const {
252   return TTIImpl->canMacroFuseCmp();
253 }
254 
255 bool TargetTransformInfo::shouldFavorPostInc() const {
256   return TTIImpl->shouldFavorPostInc();
257 }
258 
259 bool TargetTransformInfo::shouldFavorBackedgeIndex(const Loop *L) const {
260   return TTIImpl->shouldFavorBackedgeIndex(L);
261 }
262 
263 bool TargetTransformInfo::isLegalMaskedStore(Type *DataType) const {
264   return TTIImpl->isLegalMaskedStore(DataType);
265 }
266 
267 bool TargetTransformInfo::isLegalMaskedLoad(Type *DataType) const {
268   return TTIImpl->isLegalMaskedLoad(DataType);
269 }
270 
271 bool TargetTransformInfo::isLegalNTStore(Type *DataType,
272                                          unsigned Alignment) const {
273   return TTIImpl->isLegalNTStore(DataType, Alignment);
274 }
275 
276 bool TargetTransformInfo::isLegalNTLoad(Type *DataType,
277                                         unsigned Alignment) const {
278   return TTIImpl->isLegalNTLoad(DataType, Alignment);
279 }
280 
281 bool TargetTransformInfo::isLegalMaskedGather(Type *DataType) const {
282   return TTIImpl->isLegalMaskedGather(DataType);
283 }
284 
285 bool TargetTransformInfo::isLegalMaskedScatter(Type *DataType) const {
286   return TTIImpl->isLegalMaskedScatter(DataType);
287 }
288 
289 bool TargetTransformInfo::isLegalMaskedCompressStore(Type *DataType) const {
290   return TTIImpl->isLegalMaskedCompressStore(DataType);
291 }
292 
293 bool TargetTransformInfo::isLegalMaskedExpandLoad(Type *DataType) const {
294   return TTIImpl->isLegalMaskedExpandLoad(DataType);
295 }
296 
297 bool TargetTransformInfo::hasDivRemOp(Type *DataType, bool IsSigned) const {
298   return TTIImpl->hasDivRemOp(DataType, IsSigned);
299 }
300 
301 bool TargetTransformInfo::hasVolatileVariant(Instruction *I,
302                                              unsigned AddrSpace) const {
303   return TTIImpl->hasVolatileVariant(I, AddrSpace);
304 }
305 
306 bool TargetTransformInfo::prefersVectorizedAddressing() const {
307   return TTIImpl->prefersVectorizedAddressing();
308 }
309 
310 int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
311                                               int64_t BaseOffset,
312                                               bool HasBaseReg,
313                                               int64_t Scale,
314                                               unsigned AddrSpace) const {
315   int Cost = TTIImpl->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
316                                            Scale, AddrSpace);
317   assert(Cost >= 0 && "TTI should not produce negative costs!");
318   return Cost;
319 }
320 
321 bool TargetTransformInfo::LSRWithInstrQueries() const {
322   return TTIImpl->LSRWithInstrQueries();
323 }
324 
325 bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const {
326   return TTIImpl->isTruncateFree(Ty1, Ty2);
327 }
328 
329 bool TargetTransformInfo::isProfitableToHoist(Instruction *I) const {
330   return TTIImpl->isProfitableToHoist(I);
331 }
332 
333 bool TargetTransformInfo::useAA() const { return TTIImpl->useAA(); }
334 
335 bool TargetTransformInfo::isTypeLegal(Type *Ty) const {
336   return TTIImpl->isTypeLegal(Ty);
337 }
338 
339 unsigned TargetTransformInfo::getJumpBufAlignment() const {
340   return TTIImpl->getJumpBufAlignment();
341 }
342 
343 unsigned TargetTransformInfo::getJumpBufSize() const {
344   return TTIImpl->getJumpBufSize();
345 }
346 
347 bool TargetTransformInfo::shouldBuildLookupTables() const {
348   return TTIImpl->shouldBuildLookupTables();
349 }
350 bool TargetTransformInfo::shouldBuildLookupTablesForConstant(Constant *C) const {
351   return TTIImpl->shouldBuildLookupTablesForConstant(C);
352 }
353 
354 bool TargetTransformInfo::useColdCCForColdCall(Function &F) const {
355   return TTIImpl->useColdCCForColdCall(F);
356 }
357 
358 unsigned TargetTransformInfo::
359 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const {
360   return TTIImpl->getScalarizationOverhead(Ty, Insert, Extract);
361 }
362 
363 unsigned TargetTransformInfo::
364 getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
365                                  unsigned VF) const {
366   return TTIImpl->getOperandsScalarizationOverhead(Args, VF);
367 }
368 
369 bool TargetTransformInfo::supportsEfficientVectorElementLoadStore() const {
370   return TTIImpl->supportsEfficientVectorElementLoadStore();
371 }
372 
373 bool TargetTransformInfo::enableAggressiveInterleaving(bool LoopHasReductions) const {
374   return TTIImpl->enableAggressiveInterleaving(LoopHasReductions);
375 }
376 
377 const TargetTransformInfo::MemCmpExpansionOptions *
378 TargetTransformInfo::enableMemCmpExpansion(bool IsZeroCmp) const {
379   return TTIImpl->enableMemCmpExpansion(IsZeroCmp);
380 }
381 
382 bool TargetTransformInfo::enableInterleavedAccessVectorization() const {
383   return TTIImpl->enableInterleavedAccessVectorization();
384 }
385 
386 bool TargetTransformInfo::enableMaskedInterleavedAccessVectorization() const {
387   return TTIImpl->enableMaskedInterleavedAccessVectorization();
388 }
389 
390 bool TargetTransformInfo::isFPVectorizationPotentiallyUnsafe() const {
391   return TTIImpl->isFPVectorizationPotentiallyUnsafe();
392 }
393 
394 bool TargetTransformInfo::allowsMisalignedMemoryAccesses(LLVMContext &Context,
395                                                          unsigned BitWidth,
396                                                          unsigned AddressSpace,
397                                                          unsigned Alignment,
398                                                          bool *Fast) const {
399   return TTIImpl->allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
400                                                  Alignment, Fast);
401 }
402 
403 TargetTransformInfo::PopcntSupportKind
404 TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const {
405   return TTIImpl->getPopcntSupport(IntTyWidthInBit);
406 }
407 
408 bool TargetTransformInfo::haveFastSqrt(Type *Ty) const {
409   return TTIImpl->haveFastSqrt(Ty);
410 }
411 
412 bool TargetTransformInfo::isFCmpOrdCheaperThanFCmpZero(Type *Ty) const {
413   return TTIImpl->isFCmpOrdCheaperThanFCmpZero(Ty);
414 }
415 
416 int TargetTransformInfo::getFPOpCost(Type *Ty) const {
417   int Cost = TTIImpl->getFPOpCost(Ty);
418   assert(Cost >= 0 && "TTI should not produce negative costs!");
419   return Cost;
420 }
421 
422 int TargetTransformInfo::getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx,
423                                                const APInt &Imm,
424                                                Type *Ty) const {
425   int Cost = TTIImpl->getIntImmCodeSizeCost(Opcode, Idx, Imm, Ty);
426   assert(Cost >= 0 && "TTI should not produce negative costs!");
427   return Cost;
428 }
429 
430 int TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const {
431   int Cost = TTIImpl->getIntImmCost(Imm, Ty);
432   assert(Cost >= 0 && "TTI should not produce negative costs!");
433   return Cost;
434 }
435 
436 int TargetTransformInfo::getIntImmCost(unsigned Opcode, unsigned Idx,
437                                        const APInt &Imm, Type *Ty) const {
438   int Cost = TTIImpl->getIntImmCost(Opcode, Idx, Imm, Ty);
439   assert(Cost >= 0 && "TTI should not produce negative costs!");
440   return Cost;
441 }
442 
443 int TargetTransformInfo::getIntImmCost(Intrinsic::ID IID, unsigned Idx,
444                                        const APInt &Imm, Type *Ty) const {
445   int Cost = TTIImpl->getIntImmCost(IID, Idx, Imm, Ty);
446   assert(Cost >= 0 && "TTI should not produce negative costs!");
447   return Cost;
448 }
449 
450 unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const {
451   return TTIImpl->getNumberOfRegisters(Vector);
452 }
453 
454 unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const {
455   return TTIImpl->getRegisterBitWidth(Vector);
456 }
457 
458 unsigned TargetTransformInfo::getMinVectorRegisterBitWidth() const {
459   return TTIImpl->getMinVectorRegisterBitWidth();
460 }
461 
462 bool TargetTransformInfo::shouldMaximizeVectorBandwidth(bool OptSize) const {
463   return TTIImpl->shouldMaximizeVectorBandwidth(OptSize);
464 }
465 
466 unsigned TargetTransformInfo::getMinimumVF(unsigned ElemWidth) const {
467   return TTIImpl->getMinimumVF(ElemWidth);
468 }
469 
470 bool TargetTransformInfo::shouldConsiderAddressTypePromotion(
471     const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const {
472   return TTIImpl->shouldConsiderAddressTypePromotion(
473       I, AllowPromotionWithoutCommonHeader);
474 }
475 
476 unsigned TargetTransformInfo::getCacheLineSize() const {
477   return TTIImpl->getCacheLineSize();
478 }
479 
480 llvm::Optional<unsigned> TargetTransformInfo::getCacheSize(CacheLevel Level)
481   const {
482   return TTIImpl->getCacheSize(Level);
483 }
484 
485 llvm::Optional<unsigned> TargetTransformInfo::getCacheAssociativity(
486   CacheLevel Level) const {
487   return TTIImpl->getCacheAssociativity(Level);
488 }
489 
490 unsigned TargetTransformInfo::getPrefetchDistance() const {
491   return TTIImpl->getPrefetchDistance();
492 }
493 
494 unsigned TargetTransformInfo::getMinPrefetchStride() const {
495   return TTIImpl->getMinPrefetchStride();
496 }
497 
498 unsigned TargetTransformInfo::getMaxPrefetchIterationsAhead() const {
499   return TTIImpl->getMaxPrefetchIterationsAhead();
500 }
501 
502 unsigned TargetTransformInfo::getMaxInterleaveFactor(unsigned VF) const {
503   return TTIImpl->getMaxInterleaveFactor(VF);
504 }
505 
506 TargetTransformInfo::OperandValueKind
507 TargetTransformInfo::getOperandInfo(Value *V, OperandValueProperties &OpProps) {
508   OperandValueKind OpInfo = OK_AnyValue;
509   OpProps = OP_None;
510 
511   if (auto *CI = dyn_cast<ConstantInt>(V)) {
512     if (CI->getValue().isPowerOf2())
513       OpProps = OP_PowerOf2;
514     return OK_UniformConstantValue;
515   }
516 
517   // A broadcast shuffle creates a uniform value.
518   // TODO: Add support for non-zero index broadcasts.
519   // TODO: Add support for different source vector width.
520   if (auto *ShuffleInst = dyn_cast<ShuffleVectorInst>(V))
521     if (ShuffleInst->isZeroEltSplat())
522       OpInfo = OK_UniformValue;
523 
524   const Value *Splat = getSplatValue(V);
525 
526   // Check for a splat of a constant or for a non uniform vector of constants
527   // and check if the constant(s) are all powers of two.
528   if (isa<ConstantVector>(V) || isa<ConstantDataVector>(V)) {
529     OpInfo = OK_NonUniformConstantValue;
530     if (Splat) {
531       OpInfo = OK_UniformConstantValue;
532       if (auto *CI = dyn_cast<ConstantInt>(Splat))
533         if (CI->getValue().isPowerOf2())
534           OpProps = OP_PowerOf2;
535     } else if (auto *CDS = dyn_cast<ConstantDataSequential>(V)) {
536       OpProps = OP_PowerOf2;
537       for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) {
538         if (auto *CI = dyn_cast<ConstantInt>(CDS->getElementAsConstant(I)))
539           if (CI->getValue().isPowerOf2())
540             continue;
541         OpProps = OP_None;
542         break;
543       }
544     }
545   }
546 
547   // Check for a splat of a uniform value. This is not loop aware, so return
548   // true only for the obviously uniform cases (argument, globalvalue)
549   if (Splat && (isa<Argument>(Splat) || isa<GlobalValue>(Splat)))
550     OpInfo = OK_UniformValue;
551 
552   return OpInfo;
553 }
554 
555 int TargetTransformInfo::getArithmeticInstrCost(
556     unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
557     OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
558     OperandValueProperties Opd2PropInfo,
559     ArrayRef<const Value *> Args) const {
560   int Cost = TTIImpl->getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
561                                              Opd1PropInfo, Opd2PropInfo, Args);
562   assert(Cost >= 0 && "TTI should not produce negative costs!");
563   return Cost;
564 }
565 
566 int TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Ty, int Index,
567                                         Type *SubTp) const {
568   int Cost = TTIImpl->getShuffleCost(Kind, Ty, Index, SubTp);
569   assert(Cost >= 0 && "TTI should not produce negative costs!");
570   return Cost;
571 }
572 
573 int TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst,
574                                  Type *Src, const Instruction *I) const {
575   assert ((I == nullptr || I->getOpcode() == Opcode) &&
576           "Opcode should reflect passed instruction.");
577   int Cost = TTIImpl->getCastInstrCost(Opcode, Dst, Src, I);
578   assert(Cost >= 0 && "TTI should not produce negative costs!");
579   return Cost;
580 }
581 
582 int TargetTransformInfo::getExtractWithExtendCost(unsigned Opcode, Type *Dst,
583                                                   VectorType *VecTy,
584                                                   unsigned Index) const {
585   int Cost = TTIImpl->getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
586   assert(Cost >= 0 && "TTI should not produce negative costs!");
587   return Cost;
588 }
589 
590 int TargetTransformInfo::getCFInstrCost(unsigned Opcode) const {
591   int Cost = TTIImpl->getCFInstrCost(Opcode);
592   assert(Cost >= 0 && "TTI should not produce negative costs!");
593   return Cost;
594 }
595 
596 int TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
597                                  Type *CondTy, const Instruction *I) const {
598   assert ((I == nullptr || I->getOpcode() == Opcode) &&
599           "Opcode should reflect passed instruction.");
600   int Cost = TTIImpl->getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
601   assert(Cost >= 0 && "TTI should not produce negative costs!");
602   return Cost;
603 }
604 
605 int TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val,
606                                             unsigned Index) const {
607   int Cost = TTIImpl->getVectorInstrCost(Opcode, Val, Index);
608   assert(Cost >= 0 && "TTI should not produce negative costs!");
609   return Cost;
610 }
611 
612 int TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src,
613                                          unsigned Alignment,
614                                          unsigned AddressSpace,
615                                          const Instruction *I) const {
616   assert ((I == nullptr || I->getOpcode() == Opcode) &&
617           "Opcode should reflect passed instruction.");
618   int Cost = TTIImpl->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
619   assert(Cost >= 0 && "TTI should not produce negative costs!");
620   return Cost;
621 }
622 
623 int TargetTransformInfo::getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
624                                                unsigned Alignment,
625                                                unsigned AddressSpace) const {
626   int Cost =
627       TTIImpl->getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
628   assert(Cost >= 0 && "TTI should not produce negative costs!");
629   return Cost;
630 }
631 
632 int TargetTransformInfo::getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
633                                                 Value *Ptr, bool VariableMask,
634                                                 unsigned Alignment) const {
635   int Cost = TTIImpl->getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
636                                              Alignment);
637   assert(Cost >= 0 && "TTI should not produce negative costs!");
638   return Cost;
639 }
640 
641 int TargetTransformInfo::getInterleavedMemoryOpCost(
642     unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
643     unsigned Alignment, unsigned AddressSpace, bool UseMaskForCond,
644     bool UseMaskForGaps) const {
645   int Cost = TTIImpl->getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
646                                                  Alignment, AddressSpace,
647                                                  UseMaskForCond,
648                                                  UseMaskForGaps);
649   assert(Cost >= 0 && "TTI should not produce negative costs!");
650   return Cost;
651 }
652 
653 int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
654                                     ArrayRef<Type *> Tys, FastMathFlags FMF,
655                                     unsigned ScalarizationCostPassed) const {
656   int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
657                                             ScalarizationCostPassed);
658   assert(Cost >= 0 && "TTI should not produce negative costs!");
659   return Cost;
660 }
661 
662 int TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
663            ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) const {
664   int Cost = TTIImpl->getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
665   assert(Cost >= 0 && "TTI should not produce negative costs!");
666   return Cost;
667 }
668 
669 int TargetTransformInfo::getCallInstrCost(Function *F, Type *RetTy,
670                                           ArrayRef<Type *> Tys) const {
671   int Cost = TTIImpl->getCallInstrCost(F, RetTy, Tys);
672   assert(Cost >= 0 && "TTI should not produce negative costs!");
673   return Cost;
674 }
675 
676 unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const {
677   return TTIImpl->getNumberOfParts(Tp);
678 }
679 
680 int TargetTransformInfo::getAddressComputationCost(Type *Tp,
681                                                    ScalarEvolution *SE,
682                                                    const SCEV *Ptr) const {
683   int Cost = TTIImpl->getAddressComputationCost(Tp, SE, Ptr);
684   assert(Cost >= 0 && "TTI should not produce negative costs!");
685   return Cost;
686 }
687 
688 int TargetTransformInfo::getMemcpyCost(const Instruction *I) const {
689   int Cost = TTIImpl->getMemcpyCost(I);
690   assert(Cost >= 0 && "TTI should not produce negative costs!");
691   return Cost;
692 }
693 
694 int TargetTransformInfo::getArithmeticReductionCost(unsigned Opcode, Type *Ty,
695                                                     bool IsPairwiseForm) const {
696   int Cost = TTIImpl->getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
697   assert(Cost >= 0 && "TTI should not produce negative costs!");
698   return Cost;
699 }
700 
701 int TargetTransformInfo::getMinMaxReductionCost(Type *Ty, Type *CondTy,
702                                                 bool IsPairwiseForm,
703                                                 bool IsUnsigned) const {
704   int Cost =
705       TTIImpl->getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
706   assert(Cost >= 0 && "TTI should not produce negative costs!");
707   return Cost;
708 }
709 
710 unsigned
711 TargetTransformInfo::getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const {
712   return TTIImpl->getCostOfKeepingLiveOverCall(Tys);
713 }
714 
715 bool TargetTransformInfo::getTgtMemIntrinsic(IntrinsicInst *Inst,
716                                              MemIntrinsicInfo &Info) const {
717   return TTIImpl->getTgtMemIntrinsic(Inst, Info);
718 }
719 
720 unsigned TargetTransformInfo::getAtomicMemIntrinsicMaxElementSize() const {
721   return TTIImpl->getAtomicMemIntrinsicMaxElementSize();
722 }
723 
724 Value *TargetTransformInfo::getOrCreateResultFromMemIntrinsic(
725     IntrinsicInst *Inst, Type *ExpectedType) const {
726   return TTIImpl->getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
727 }
728 
729 Type *TargetTransformInfo::getMemcpyLoopLoweringType(LLVMContext &Context,
730                                                      Value *Length,
731                                                      unsigned SrcAlign,
732                                                      unsigned DestAlign) const {
733   return TTIImpl->getMemcpyLoopLoweringType(Context, Length, SrcAlign,
734                                             DestAlign);
735 }
736 
737 void TargetTransformInfo::getMemcpyLoopResidualLoweringType(
738     SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
739     unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const {
740   TTIImpl->getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
741                                              SrcAlign, DestAlign);
742 }
743 
744 bool TargetTransformInfo::areInlineCompatible(const Function *Caller,
745                                               const Function *Callee) const {
746   return TTIImpl->areInlineCompatible(Caller, Callee);
747 }
748 
749 bool TargetTransformInfo::areFunctionArgsABICompatible(
750     const Function *Caller, const Function *Callee,
751     SmallPtrSetImpl<Argument *> &Args) const {
752   return TTIImpl->areFunctionArgsABICompatible(Caller, Callee, Args);
753 }
754 
755 bool TargetTransformInfo::isIndexedLoadLegal(MemIndexedMode Mode,
756                                              Type *Ty) const {
757   return TTIImpl->isIndexedLoadLegal(Mode, Ty);
758 }
759 
760 bool TargetTransformInfo::isIndexedStoreLegal(MemIndexedMode Mode,
761                                               Type *Ty) const {
762   return TTIImpl->isIndexedStoreLegal(Mode, Ty);
763 }
764 
765 unsigned TargetTransformInfo::getLoadStoreVecRegBitWidth(unsigned AS) const {
766   return TTIImpl->getLoadStoreVecRegBitWidth(AS);
767 }
768 
769 bool TargetTransformInfo::isLegalToVectorizeLoad(LoadInst *LI) const {
770   return TTIImpl->isLegalToVectorizeLoad(LI);
771 }
772 
773 bool TargetTransformInfo::isLegalToVectorizeStore(StoreInst *SI) const {
774   return TTIImpl->isLegalToVectorizeStore(SI);
775 }
776 
777 bool TargetTransformInfo::isLegalToVectorizeLoadChain(
778     unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
779   return TTIImpl->isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
780                                               AddrSpace);
781 }
782 
783 bool TargetTransformInfo::isLegalToVectorizeStoreChain(
784     unsigned ChainSizeInBytes, unsigned Alignment, unsigned AddrSpace) const {
785   return TTIImpl->isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
786                                                AddrSpace);
787 }
788 
789 unsigned TargetTransformInfo::getLoadVectorFactor(unsigned VF,
790                                                   unsigned LoadSize,
791                                                   unsigned ChainSizeInBytes,
792                                                   VectorType *VecTy) const {
793   return TTIImpl->getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
794 }
795 
796 unsigned TargetTransformInfo::getStoreVectorFactor(unsigned VF,
797                                                    unsigned StoreSize,
798                                                    unsigned ChainSizeInBytes,
799                                                    VectorType *VecTy) const {
800   return TTIImpl->getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
801 }
802 
803 bool TargetTransformInfo::useReductionIntrinsic(unsigned Opcode,
804                                                 Type *Ty, ReductionFlags Flags) const {
805   return TTIImpl->useReductionIntrinsic(Opcode, Ty, Flags);
806 }
807 
808 bool TargetTransformInfo::shouldExpandReduction(const IntrinsicInst *II) const {
809   return TTIImpl->shouldExpandReduction(II);
810 }
811 
812 unsigned TargetTransformInfo::getGISelRematGlobalCost() const {
813   return TTIImpl->getGISelRematGlobalCost();
814 }
815 
816 int TargetTransformInfo::getInstructionLatency(const Instruction *I) const {
817   return TTIImpl->getInstructionLatency(I);
818 }
819 
820 static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft,
821                                      unsigned Level) {
822   // We don't need a shuffle if we just want to have element 0 in position 0 of
823   // the vector.
824   if (!SI && Level == 0 && IsLeft)
825     return true;
826   else if (!SI)
827     return false;
828 
829   SmallVector<int, 32> Mask(SI->getType()->getVectorNumElements(), -1);
830 
831   // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether
832   // we look at the left or right side.
833   for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2)
834     Mask[i] = val;
835 
836   SmallVector<int, 16> ActualMask = SI->getShuffleMask();
837   return Mask == ActualMask;
838 }
839 
840 namespace {
841 /// Kind of the reduction data.
842 enum ReductionKind {
843   RK_None,           /// Not a reduction.
844   RK_Arithmetic,     /// Binary reduction data.
845   RK_MinMax,         /// Min/max reduction data.
846   RK_UnsignedMinMax, /// Unsigned min/max reduction data.
847 };
848 /// Contains opcode + LHS/RHS parts of the reduction operations.
849 struct ReductionData {
850   ReductionData() = delete;
851   ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
852       : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
853     assert(Kind != RK_None && "expected binary or min/max reduction only.");
854   }
855   unsigned Opcode = 0;
856   Value *LHS = nullptr;
857   Value *RHS = nullptr;
858   ReductionKind Kind = RK_None;
859   bool hasSameData(ReductionData &RD) const {
860     return Kind == RD.Kind && Opcode == RD.Opcode;
861   }
862 };
863 } // namespace
864 
865 static Optional<ReductionData> getReductionData(Instruction *I) {
866   Value *L, *R;
867   if (m_BinOp(m_Value(L), m_Value(R)).match(I))
868     return ReductionData(RK_Arithmetic, I->getOpcode(), L, R);
869   if (auto *SI = dyn_cast<SelectInst>(I)) {
870     if (m_SMin(m_Value(L), m_Value(R)).match(SI) ||
871         m_SMax(m_Value(L), m_Value(R)).match(SI) ||
872         m_OrdFMin(m_Value(L), m_Value(R)).match(SI) ||
873         m_OrdFMax(m_Value(L), m_Value(R)).match(SI) ||
874         m_UnordFMin(m_Value(L), m_Value(R)).match(SI) ||
875         m_UnordFMax(m_Value(L), m_Value(R)).match(SI)) {
876       auto *CI = cast<CmpInst>(SI->getCondition());
877       return ReductionData(RK_MinMax, CI->getOpcode(), L, R);
878     }
879     if (m_UMin(m_Value(L), m_Value(R)).match(SI) ||
880         m_UMax(m_Value(L), m_Value(R)).match(SI)) {
881       auto *CI = cast<CmpInst>(SI->getCondition());
882       return ReductionData(RK_UnsignedMinMax, CI->getOpcode(), L, R);
883     }
884   }
885   return llvm::None;
886 }
887 
888 static ReductionKind matchPairwiseReductionAtLevel(Instruction *I,
889                                                    unsigned Level,
890                                                    unsigned NumLevels) {
891   // Match one level of pairwise operations.
892   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
893   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
894   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
895   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
896   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
897   if (!I)
898     return RK_None;
899 
900   assert(I->getType()->isVectorTy() && "Expecting a vector type");
901 
902   Optional<ReductionData> RD = getReductionData(I);
903   if (!RD)
904     return RK_None;
905 
906   ShuffleVectorInst *LS = dyn_cast<ShuffleVectorInst>(RD->LHS);
907   if (!LS && Level)
908     return RK_None;
909   ShuffleVectorInst *RS = dyn_cast<ShuffleVectorInst>(RD->RHS);
910   if (!RS && Level)
911     return RK_None;
912 
913   // On level 0 we can omit one shufflevector instruction.
914   if (!Level && !RS && !LS)
915     return RK_None;
916 
917   // Shuffle inputs must match.
918   Value *NextLevelOpL = LS ? LS->getOperand(0) : nullptr;
919   Value *NextLevelOpR = RS ? RS->getOperand(0) : nullptr;
920   Value *NextLevelOp = nullptr;
921   if (NextLevelOpR && NextLevelOpL) {
922     // If we have two shuffles their operands must match.
923     if (NextLevelOpL != NextLevelOpR)
924       return RK_None;
925 
926     NextLevelOp = NextLevelOpL;
927   } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) {
928     // On the first level we can omit the shufflevector <0, undef,...>. So the
929     // input to the other shufflevector <1, undef> must match with one of the
930     // inputs to the current binary operation.
931     // Example:
932     //  %NextLevelOpL = shufflevector %R, <1, undef ...>
933     //  %BinOp        = fadd          %NextLevelOpL, %R
934     if (NextLevelOpL && NextLevelOpL != RD->RHS)
935       return RK_None;
936     else if (NextLevelOpR && NextLevelOpR != RD->LHS)
937       return RK_None;
938 
939     NextLevelOp = NextLevelOpL ? RD->RHS : RD->LHS;
940   } else
941     return RK_None;
942 
943   // Check that the next levels binary operation exists and matches with the
944   // current one.
945   if (Level + 1 != NumLevels) {
946     Optional<ReductionData> NextLevelRD =
947         getReductionData(cast<Instruction>(NextLevelOp));
948     if (!NextLevelRD || !RD->hasSameData(*NextLevelRD))
949       return RK_None;
950   }
951 
952   // Shuffle mask for pairwise operation must match.
953   if (matchPairwiseShuffleMask(LS, /*IsLeft=*/true, Level)) {
954     if (!matchPairwiseShuffleMask(RS, /*IsLeft=*/false, Level))
955       return RK_None;
956   } else if (matchPairwiseShuffleMask(RS, /*IsLeft=*/true, Level)) {
957     if (!matchPairwiseShuffleMask(LS, /*IsLeft=*/false, Level))
958       return RK_None;
959   } else {
960     return RK_None;
961   }
962 
963   if (++Level == NumLevels)
964     return RD->Kind;
965 
966   // Match next level.
967   return matchPairwiseReductionAtLevel(cast<Instruction>(NextLevelOp), Level,
968                                        NumLevels);
969 }
970 
971 static ReductionKind matchPairwiseReduction(const ExtractElementInst *ReduxRoot,
972                                             unsigned &Opcode, Type *&Ty) {
973   if (!EnableReduxCost)
974     return RK_None;
975 
976   // Need to extract the first element.
977   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
978   unsigned Idx = ~0u;
979   if (CI)
980     Idx = CI->getZExtValue();
981   if (Idx != 0)
982     return RK_None;
983 
984   auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
985   if (!RdxStart)
986     return RK_None;
987   Optional<ReductionData> RD = getReductionData(RdxStart);
988   if (!RD)
989     return RK_None;
990 
991   Type *VecTy = RdxStart->getType();
992   unsigned NumVecElems = VecTy->getVectorNumElements();
993   if (!isPowerOf2_32(NumVecElems))
994     return RK_None;
995 
996   // We look for a sequence of shuffle,shuffle,add triples like the following
997   // that builds a pairwise reduction tree.
998   //
999   //  (X0, X1, X2, X3)
1000   //   (X0 + X1, X2 + X3, undef, undef)
1001   //    ((X0 + X1) + (X2 + X3), undef, undef, undef)
1002   //
1003   // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef,
1004   //       <4 x i32> <i32 0, i32 2 , i32 undef, i32 undef>
1005   // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef,
1006   //       <4 x i32> <i32 1, i32 3, i32 undef, i32 undef>
1007   // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1
1008   // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1009   //       <4 x i32> <i32 0, i32 undef, i32 undef, i32 undef>
1010   // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef,
1011   //       <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1012   // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1
1013   // %r = extractelement <4 x float> %bin.rdx8, i32 0
1014   if (matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems)) ==
1015       RK_None)
1016     return RK_None;
1017 
1018   Opcode = RD->Opcode;
1019   Ty = VecTy;
1020 
1021   return RD->Kind;
1022 }
1023 
1024 static std::pair<Value *, ShuffleVectorInst *>
1025 getShuffleAndOtherOprd(Value *L, Value *R) {
1026   ShuffleVectorInst *S = nullptr;
1027 
1028   if ((S = dyn_cast<ShuffleVectorInst>(L)))
1029     return std::make_pair(R, S);
1030 
1031   S = dyn_cast<ShuffleVectorInst>(R);
1032   return std::make_pair(L, S);
1033 }
1034 
1035 static ReductionKind
1036 matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot,
1037                               unsigned &Opcode, Type *&Ty) {
1038   if (!EnableReduxCost)
1039     return RK_None;
1040 
1041   // Need to extract the first element.
1042   ConstantInt *CI = dyn_cast<ConstantInt>(ReduxRoot->getOperand(1));
1043   unsigned Idx = ~0u;
1044   if (CI)
1045     Idx = CI->getZExtValue();
1046   if (Idx != 0)
1047     return RK_None;
1048 
1049   auto *RdxStart = dyn_cast<Instruction>(ReduxRoot->getOperand(0));
1050   if (!RdxStart)
1051     return RK_None;
1052   Optional<ReductionData> RD = getReductionData(RdxStart);
1053   if (!RD)
1054     return RK_None;
1055 
1056   Type *VecTy = ReduxRoot->getOperand(0)->getType();
1057   unsigned NumVecElems = VecTy->getVectorNumElements();
1058   if (!isPowerOf2_32(NumVecElems))
1059     return RK_None;
1060 
1061   // We look for a sequence of shuffles and adds like the following matching one
1062   // fadd, shuffle vector pair at a time.
1063   //
1064   // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef,
1065   //                           <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
1066   // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf
1067   // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef,
1068   //                          <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
1069   // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7
1070   // %r = extractelement <4 x float> %bin.rdx8, i32 0
1071 
1072   unsigned MaskStart = 1;
1073   Instruction *RdxOp = RdxStart;
1074   SmallVector<int, 32> ShuffleMask(NumVecElems, 0);
1075   unsigned NumVecElemsRemain = NumVecElems;
1076   while (NumVecElemsRemain - 1) {
1077     // Check for the right reduction operation.
1078     if (!RdxOp)
1079       return RK_None;
1080     Optional<ReductionData> RDLevel = getReductionData(RdxOp);
1081     if (!RDLevel || !RDLevel->hasSameData(*RD))
1082       return RK_None;
1083 
1084     Value *NextRdxOp;
1085     ShuffleVectorInst *Shuffle;
1086     std::tie(NextRdxOp, Shuffle) =
1087         getShuffleAndOtherOprd(RDLevel->LHS, RDLevel->RHS);
1088 
1089     // Check the current reduction operation and the shuffle use the same value.
1090     if (Shuffle == nullptr)
1091       return RK_None;
1092     if (Shuffle->getOperand(0) != NextRdxOp)
1093       return RK_None;
1094 
1095     // Check that shuffle masks matches.
1096     for (unsigned j = 0; j != MaskStart; ++j)
1097       ShuffleMask[j] = MaskStart + j;
1098     // Fill the rest of the mask with -1 for undef.
1099     std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1);
1100 
1101     SmallVector<int, 16> Mask = Shuffle->getShuffleMask();
1102     if (ShuffleMask != Mask)
1103       return RK_None;
1104 
1105     RdxOp = dyn_cast<Instruction>(NextRdxOp);
1106     NumVecElemsRemain /= 2;
1107     MaskStart *= 2;
1108   }
1109 
1110   Opcode = RD->Opcode;
1111   Ty = VecTy;
1112   return RD->Kind;
1113 }
1114 
1115 int TargetTransformInfo::getInstructionThroughput(const Instruction *I) const {
1116   switch (I->getOpcode()) {
1117   case Instruction::GetElementPtr:
1118     return getUserCost(I);
1119 
1120   case Instruction::Ret:
1121   case Instruction::PHI:
1122   case Instruction::Br: {
1123     return getCFInstrCost(I->getOpcode());
1124   }
1125   case Instruction::Add:
1126   case Instruction::FAdd:
1127   case Instruction::Sub:
1128   case Instruction::FSub:
1129   case Instruction::Mul:
1130   case Instruction::FMul:
1131   case Instruction::UDiv:
1132   case Instruction::SDiv:
1133   case Instruction::FDiv:
1134   case Instruction::URem:
1135   case Instruction::SRem:
1136   case Instruction::FRem:
1137   case Instruction::Shl:
1138   case Instruction::LShr:
1139   case Instruction::AShr:
1140   case Instruction::And:
1141   case Instruction::Or:
1142   case Instruction::Xor: {
1143     TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
1144     TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
1145     Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1146     Op2VK = getOperandInfo(I->getOperand(1), Op2VP);
1147     SmallVector<const Value *, 2> Operands(I->operand_values());
1148     return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1149                                   Op1VP, Op2VP, Operands);
1150   }
1151   case Instruction::FNeg: {
1152     TargetTransformInfo::OperandValueKind Op1VK, Op2VK;
1153     TargetTransformInfo::OperandValueProperties Op1VP, Op2VP;
1154     Op1VK = getOperandInfo(I->getOperand(0), Op1VP);
1155     Op2VK = OK_AnyValue;
1156     Op2VP = OP_None;
1157     SmallVector<const Value *, 2> Operands(I->operand_values());
1158     return getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, Op2VK,
1159                                   Op1VP, Op2VP, Operands);
1160   }
1161   case Instruction::Select: {
1162     const SelectInst *SI = cast<SelectInst>(I);
1163     Type *CondTy = SI->getCondition()->getType();
1164     return getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy, I);
1165   }
1166   case Instruction::ICmp:
1167   case Instruction::FCmp: {
1168     Type *ValTy = I->getOperand(0)->getType();
1169     return getCmpSelInstrCost(I->getOpcode(), ValTy, I->getType(), I);
1170   }
1171   case Instruction::Store: {
1172     const StoreInst *SI = cast<StoreInst>(I);
1173     Type *ValTy = SI->getValueOperand()->getType();
1174     return getMemoryOpCost(I->getOpcode(), ValTy,
1175                                 SI->getAlignment(),
1176                                 SI->getPointerAddressSpace(), I);
1177   }
1178   case Instruction::Load: {
1179     const LoadInst *LI = cast<LoadInst>(I);
1180     return getMemoryOpCost(I->getOpcode(), I->getType(),
1181                                 LI->getAlignment(),
1182                                 LI->getPointerAddressSpace(), I);
1183   }
1184   case Instruction::ZExt:
1185   case Instruction::SExt:
1186   case Instruction::FPToUI:
1187   case Instruction::FPToSI:
1188   case Instruction::FPExt:
1189   case Instruction::PtrToInt:
1190   case Instruction::IntToPtr:
1191   case Instruction::SIToFP:
1192   case Instruction::UIToFP:
1193   case Instruction::Trunc:
1194   case Instruction::FPTrunc:
1195   case Instruction::BitCast:
1196   case Instruction::AddrSpaceCast: {
1197     Type *SrcTy = I->getOperand(0)->getType();
1198     return getCastInstrCost(I->getOpcode(), I->getType(), SrcTy, I);
1199   }
1200   case Instruction::ExtractElement: {
1201     const ExtractElementInst * EEI = cast<ExtractElementInst>(I);
1202     ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(1));
1203     unsigned Idx = -1;
1204     if (CI)
1205       Idx = CI->getZExtValue();
1206 
1207     // Try to match a reduction sequence (series of shufflevector and vector
1208     // adds followed by a extractelement).
1209     unsigned ReduxOpCode;
1210     Type *ReduxType;
1211 
1212     switch (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) {
1213     case RK_Arithmetic:
1214       return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1215                                              /*IsPairwiseForm=*/false);
1216     case RK_MinMax:
1217       return getMinMaxReductionCost(
1218           ReduxType, CmpInst::makeCmpResultType(ReduxType),
1219           /*IsPairwiseForm=*/false, /*IsUnsigned=*/false);
1220     case RK_UnsignedMinMax:
1221       return getMinMaxReductionCost(
1222           ReduxType, CmpInst::makeCmpResultType(ReduxType),
1223           /*IsPairwiseForm=*/false, /*IsUnsigned=*/true);
1224     case RK_None:
1225       break;
1226     }
1227 
1228     switch (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) {
1229     case RK_Arithmetic:
1230       return getArithmeticReductionCost(ReduxOpCode, ReduxType,
1231                                              /*IsPairwiseForm=*/true);
1232     case RK_MinMax:
1233       return getMinMaxReductionCost(
1234           ReduxType, CmpInst::makeCmpResultType(ReduxType),
1235           /*IsPairwiseForm=*/true, /*IsUnsigned=*/false);
1236     case RK_UnsignedMinMax:
1237       return getMinMaxReductionCost(
1238           ReduxType, CmpInst::makeCmpResultType(ReduxType),
1239           /*IsPairwiseForm=*/true, /*IsUnsigned=*/true);
1240     case RK_None:
1241       break;
1242     }
1243 
1244     return getVectorInstrCost(I->getOpcode(),
1245                                    EEI->getOperand(0)->getType(), Idx);
1246   }
1247   case Instruction::InsertElement: {
1248     const InsertElementInst * IE = cast<InsertElementInst>(I);
1249     ConstantInt *CI = dyn_cast<ConstantInt>(IE->getOperand(2));
1250     unsigned Idx = -1;
1251     if (CI)
1252       Idx = CI->getZExtValue();
1253     return getVectorInstrCost(I->getOpcode(),
1254                                    IE->getType(), Idx);
1255   }
1256   case Instruction::ShuffleVector: {
1257     const ShuffleVectorInst *Shuffle = cast<ShuffleVectorInst>(I);
1258     Type *Ty = Shuffle->getType();
1259     Type *SrcTy = Shuffle->getOperand(0)->getType();
1260 
1261     // TODO: Identify and add costs for insert subvector, etc.
1262     int SubIndex;
1263     if (Shuffle->isExtractSubvectorMask(SubIndex))
1264       return TTIImpl->getShuffleCost(SK_ExtractSubvector, SrcTy, SubIndex, Ty);
1265 
1266     if (Shuffle->changesLength())
1267       return -1;
1268 
1269     if (Shuffle->isIdentity())
1270       return 0;
1271 
1272     if (Shuffle->isReverse())
1273       return TTIImpl->getShuffleCost(SK_Reverse, Ty, 0, nullptr);
1274 
1275     if (Shuffle->isSelect())
1276       return TTIImpl->getShuffleCost(SK_Select, Ty, 0, nullptr);
1277 
1278     if (Shuffle->isTranspose())
1279       return TTIImpl->getShuffleCost(SK_Transpose, Ty, 0, nullptr);
1280 
1281     if (Shuffle->isZeroEltSplat())
1282       return TTIImpl->getShuffleCost(SK_Broadcast, Ty, 0, nullptr);
1283 
1284     if (Shuffle->isSingleSource())
1285       return TTIImpl->getShuffleCost(SK_PermuteSingleSrc, Ty, 0, nullptr);
1286 
1287     return TTIImpl->getShuffleCost(SK_PermuteTwoSrc, Ty, 0, nullptr);
1288   }
1289   case Instruction::Call:
1290     if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
1291       SmallVector<Value *, 4> Args(II->arg_operands());
1292 
1293       FastMathFlags FMF;
1294       if (auto *FPMO = dyn_cast<FPMathOperator>(II))
1295         FMF = FPMO->getFastMathFlags();
1296 
1297       return getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(),
1298                                         Args, FMF);
1299     }
1300     return -1;
1301   default:
1302     // We don't have any information on this instruction.
1303     return -1;
1304   }
1305 }
1306 
1307 TargetTransformInfo::Concept::~Concept() {}
1308 
1309 TargetIRAnalysis::TargetIRAnalysis() : TTICallback(&getDefaultTTI) {}
1310 
1311 TargetIRAnalysis::TargetIRAnalysis(
1312     std::function<Result(const Function &)> TTICallback)
1313     : TTICallback(std::move(TTICallback)) {}
1314 
1315 TargetIRAnalysis::Result TargetIRAnalysis::run(const Function &F,
1316                                                FunctionAnalysisManager &) {
1317   return TTICallback(F);
1318 }
1319 
1320 AnalysisKey TargetIRAnalysis::Key;
1321 
1322 TargetIRAnalysis::Result TargetIRAnalysis::getDefaultTTI(const Function &F) {
1323   return Result(F.getParent()->getDataLayout());
1324 }
1325 
1326 // Register the basic pass.
1327 INITIALIZE_PASS(TargetTransformInfoWrapperPass, "tti",
1328                 "Target Transform Information", false, true)
1329 char TargetTransformInfoWrapperPass::ID = 0;
1330 
1331 void TargetTransformInfoWrapperPass::anchor() {}
1332 
1333 TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass()
1334     : ImmutablePass(ID) {
1335   initializeTargetTransformInfoWrapperPassPass(
1336       *PassRegistry::getPassRegistry());
1337 }
1338 
1339 TargetTransformInfoWrapperPass::TargetTransformInfoWrapperPass(
1340     TargetIRAnalysis TIRA)
1341     : ImmutablePass(ID), TIRA(std::move(TIRA)) {
1342   initializeTargetTransformInfoWrapperPassPass(
1343       *PassRegistry::getPassRegistry());
1344 }
1345 
1346 TargetTransformInfo &TargetTransformInfoWrapperPass::getTTI(const Function &F) {
1347   FunctionAnalysisManager DummyFAM;
1348   TTI = TIRA.run(F, DummyFAM);
1349   return *TTI;
1350 }
1351 
1352 ImmutablePass *
1353 llvm::createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA) {
1354   return new TargetTransformInfoWrapperPass(std::move(TIRA));
1355 }
1356