12a6c8715SSebastian Neubauer //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
22a6c8715SSebastian Neubauer //
32a6c8715SSebastian Neubauer // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42a6c8715SSebastian Neubauer // See https://llvm.org/LICENSE.txt for license information.
52a6c8715SSebastian Neubauer // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
62a6c8715SSebastian Neubauer //
72a6c8715SSebastian Neubauer //===----------------------------------------------------------------------===//
82a6c8715SSebastian Neubauer //
92a6c8715SSebastian Neubauer // \file
102a6c8715SSebastian Neubauer // This file implements a TargetTransformInfo analysis pass specific to the
112a6c8715SSebastian Neubauer // AMDGPU target machine. It uses the target's detailed information to provide
122a6c8715SSebastian Neubauer // more precise answers to certain TTI queries, while letting the target
132a6c8715SSebastian Neubauer // independent and default TTI implementations handle the rest.
142a6c8715SSebastian Neubauer //
152a6c8715SSebastian Neubauer //===----------------------------------------------------------------------===//
162a6c8715SSebastian Neubauer 
176a87e9b0Sdfukalov #include "AMDGPUInstrInfo.h"
182a6c8715SSebastian Neubauer #include "AMDGPUTargetTransformInfo.h"
19560d7e04Sdfukalov #include "GCNSubtarget.h"
206a87e9b0Sdfukalov #include "llvm/IR/IntrinsicsAMDGPU.h"
212a6c8715SSebastian Neubauer #include "llvm/Transforms/InstCombine/InstCombiner.h"
222a6c8715SSebastian Neubauer 
232a6c8715SSebastian Neubauer using namespace llvm;
242a6c8715SSebastian Neubauer 
252a6c8715SSebastian Neubauer #define DEBUG_TYPE "AMDGPUtti"
262a6c8715SSebastian Neubauer 
272a6c8715SSebastian Neubauer namespace {
282a6c8715SSebastian Neubauer 
292a6c8715SSebastian Neubauer struct AMDGPUImageDMaskIntrinsic {
302a6c8715SSebastian Neubauer   unsigned Intr;
312a6c8715SSebastian Neubauer };
322a6c8715SSebastian Neubauer 
332a6c8715SSebastian Neubauer #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
342a6c8715SSebastian Neubauer #include "InstCombineTables.inc"
352a6c8715SSebastian Neubauer 
362a6c8715SSebastian Neubauer } // end anonymous namespace
372a6c8715SSebastian Neubauer 
382a6c8715SSebastian Neubauer // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
392a6c8715SSebastian Neubauer //
402a6c8715SSebastian Neubauer // A single NaN input is folded to minnum, so we rely on that folding for
412a6c8715SSebastian Neubauer // handling NaNs.
fmed3AMDGCN(const APFloat & Src0,const APFloat & Src1,const APFloat & Src2)422a6c8715SSebastian Neubauer static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
432a6c8715SSebastian Neubauer                            const APFloat &Src2) {
442a6c8715SSebastian Neubauer   APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
452a6c8715SSebastian Neubauer 
462a6c8715SSebastian Neubauer   APFloat::cmpResult Cmp0 = Max3.compare(Src0);
472a6c8715SSebastian Neubauer   assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
482a6c8715SSebastian Neubauer   if (Cmp0 == APFloat::cmpEqual)
492a6c8715SSebastian Neubauer     return maxnum(Src1, Src2);
502a6c8715SSebastian Neubauer 
512a6c8715SSebastian Neubauer   APFloat::cmpResult Cmp1 = Max3.compare(Src1);
522a6c8715SSebastian Neubauer   assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
532a6c8715SSebastian Neubauer   if (Cmp1 == APFloat::cmpEqual)
542a6c8715SSebastian Neubauer     return maxnum(Src0, Src2);
552a6c8715SSebastian Neubauer 
562a6c8715SSebastian Neubauer   return maxnum(Src0, Src1);
572a6c8715SSebastian Neubauer }
582a6c8715SSebastian Neubauer 
59b8d19947SSebastian Neubauer // Check if a value can be converted to a 16-bit value without losing
60b8d19947SSebastian Neubauer // precision.
614ed7c6eeSSebastian Neubauer // The value is expected to be either a float (IsFloat = true) or an unsigned
624ed7c6eeSSebastian Neubauer // integer (IsFloat = false).
canSafelyConvertTo16Bit(Value & V,bool IsFloat)634ed7c6eeSSebastian Neubauer static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
64b8d19947SSebastian Neubauer   Type *VTy = V.getType();
65b8d19947SSebastian Neubauer   if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
66b8d19947SSebastian Neubauer     // The value is already 16-bit, so we don't want to convert to 16-bit again!
67b8d19947SSebastian Neubauer     return false;
68b8d19947SSebastian Neubauer   }
694ed7c6eeSSebastian Neubauer   if (IsFloat) {
70b8d19947SSebastian Neubauer     if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
714ed7c6eeSSebastian Neubauer       // We need to check that if we cast the index down to a half, we do not
724ed7c6eeSSebastian Neubauer       // lose precision.
73b8d19947SSebastian Neubauer       APFloat FloatValue(ConstFloat->getValueAPF());
74b8d19947SSebastian Neubauer       bool LosesInfo = true;
754ed7c6eeSSebastian Neubauer       FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
764ed7c6eeSSebastian Neubauer                          &LosesInfo);
77b8d19947SSebastian Neubauer       return !LosesInfo;
78b8d19947SSebastian Neubauer     }
794ed7c6eeSSebastian Neubauer   } else {
804ed7c6eeSSebastian Neubauer     if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
814ed7c6eeSSebastian Neubauer       // We need to check that if we cast the index down to an i16, we do not
824ed7c6eeSSebastian Neubauer       // lose precision.
834ed7c6eeSSebastian Neubauer       APInt IntValue(ConstInt->getValue());
844ed7c6eeSSebastian Neubauer       return IntValue.getActiveBits() <= 16;
854ed7c6eeSSebastian Neubauer     }
864ed7c6eeSSebastian Neubauer   }
874ed7c6eeSSebastian Neubauer 
88b8d19947SSebastian Neubauer   Value *CastSrc;
894ed7c6eeSSebastian Neubauer   bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
904ed7c6eeSSebastian Neubauer                        : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
914ed7c6eeSSebastian Neubauer   if (IsExt) {
92b8d19947SSebastian Neubauer     Type *CastSrcTy = CastSrc->getType();
93b8d19947SSebastian Neubauer     if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
94b8d19947SSebastian Neubauer       return true;
95b8d19947SSebastian Neubauer   }
96b8d19947SSebastian Neubauer 
97b8d19947SSebastian Neubauer   return false;
98b8d19947SSebastian Neubauer }
99b8d19947SSebastian Neubauer 
100b8d19947SSebastian Neubauer // Convert a value to 16-bit.
convertTo16Bit(Value & V,InstCombiner::BuilderTy & Builder)10120e9c36cSFangrui Song static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
102b8d19947SSebastian Neubauer   Type *VTy = V.getType();
103b8d19947SSebastian Neubauer   if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
104b8d19947SSebastian Neubauer     return cast<Instruction>(&V)->getOperand(0);
105b8d19947SSebastian Neubauer   if (VTy->isIntegerTy())
106b8d19947SSebastian Neubauer     return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
107b8d19947SSebastian Neubauer   if (VTy->isFloatingPointTy())
108b8d19947SSebastian Neubauer     return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
109b8d19947SSebastian Neubauer 
110b8d19947SSebastian Neubauer   llvm_unreachable("Should never be called!");
111b8d19947SSebastian Neubauer }
112b8d19947SSebastian Neubauer 
1132417de27SMariusz Sikora /// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
1142417de27SMariusz Sikora /// modified arguments (based on OldIntr) and replaces InstToReplace with
1152417de27SMariusz Sikora /// this newly created intrinsic call.
modifyIntrinsicCall(IntrinsicInst & OldIntr,Instruction & InstToReplace,unsigned NewIntr,InstCombiner & IC,std::function<void (SmallVectorImpl<Value * > &,SmallVectorImpl<Type * > &)> Func)116603d1803SSebastian Neubauer static Optional<Instruction *> modifyIntrinsicCall(
1172417de27SMariusz Sikora     IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
1182417de27SMariusz Sikora     InstCombiner &IC,
119603d1803SSebastian Neubauer     std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
120603d1803SSebastian Neubauer         Func) {
121603d1803SSebastian Neubauer   SmallVector<Type *, 4> ArgTys;
1222417de27SMariusz Sikora   if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
123603d1803SSebastian Neubauer     return None;
124603d1803SSebastian Neubauer 
1252417de27SMariusz Sikora   SmallVector<Value *, 8> Args(OldIntr.args());
126603d1803SSebastian Neubauer 
127603d1803SSebastian Neubauer   // Modify arguments and types
128603d1803SSebastian Neubauer   Func(Args, ArgTys);
129603d1803SSebastian Neubauer 
1302417de27SMariusz Sikora   Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
131603d1803SSebastian Neubauer 
132603d1803SSebastian Neubauer   CallInst *NewCall = IC.Builder.CreateCall(I, Args);
1332417de27SMariusz Sikora   NewCall->takeName(&OldIntr);
1342417de27SMariusz Sikora   NewCall->copyMetadata(OldIntr);
135603d1803SSebastian Neubauer   if (isa<FPMathOperator>(NewCall))
1362417de27SMariusz Sikora     NewCall->copyFastMathFlags(&OldIntr);
137603d1803SSebastian Neubauer 
138603d1803SSebastian Neubauer   // Erase and replace uses
1392417de27SMariusz Sikora   if (!InstToReplace.getType()->isVoidTy())
1402417de27SMariusz Sikora     IC.replaceInstUsesWith(InstToReplace, NewCall);
1412417de27SMariusz Sikora 
1422417de27SMariusz Sikora   bool RemoveOldIntr = &OldIntr != &InstToReplace;
1432417de27SMariusz Sikora 
1442417de27SMariusz Sikora   auto RetValue = IC.eraseInstFromFunction(InstToReplace);
1452417de27SMariusz Sikora   if (RemoveOldIntr)
1462417de27SMariusz Sikora     IC.eraseInstFromFunction(OldIntr);
1472417de27SMariusz Sikora 
1482417de27SMariusz Sikora   return RetValue;
149603d1803SSebastian Neubauer }
150603d1803SSebastian Neubauer 
151b8d19947SSebastian Neubauer static Optional<Instruction *>
simplifyAMDGCNImageIntrinsic(const GCNSubtarget * ST,const AMDGPU::ImageDimIntrinsicInfo * ImageDimIntr,IntrinsicInst & II,InstCombiner & IC)152b8d19947SSebastian Neubauer simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
153b8d19947SSebastian Neubauer                              const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
154b8d19947SSebastian Neubauer                              IntrinsicInst &II, InstCombiner &IC) {
155603d1803SSebastian Neubauer   // Optimize _L to _LZ when _L is zero
156603d1803SSebastian Neubauer   if (const auto *LZMappingInfo =
157603d1803SSebastian Neubauer           AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
158603d1803SSebastian Neubauer     if (auto *ConstantLod =
159603d1803SSebastian Neubauer             dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
160603d1803SSebastian Neubauer       if (ConstantLod->isZero() || ConstantLod->isNegative()) {
161603d1803SSebastian Neubauer         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
162603d1803SSebastian Neubauer             AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
163603d1803SSebastian Neubauer                                                      ImageDimIntr->Dim);
164603d1803SSebastian Neubauer         return modifyIntrinsicCall(
1652417de27SMariusz Sikora             II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
166603d1803SSebastian Neubauer               Args.erase(Args.begin() + ImageDimIntr->LodIndex);
167603d1803SSebastian Neubauer             });
168603d1803SSebastian Neubauer       }
169603d1803SSebastian Neubauer     }
170603d1803SSebastian Neubauer   }
171603d1803SSebastian Neubauer 
172603d1803SSebastian Neubauer   // Optimize _mip away, when 'lod' is zero
173603d1803SSebastian Neubauer   if (const auto *MIPMappingInfo =
174603d1803SSebastian Neubauer           AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
175603d1803SSebastian Neubauer     if (auto *ConstantMip =
176603d1803SSebastian Neubauer             dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
177603d1803SSebastian Neubauer       if (ConstantMip->isZero()) {
178603d1803SSebastian Neubauer         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
179603d1803SSebastian Neubauer             AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
180603d1803SSebastian Neubauer                                                      ImageDimIntr->Dim);
181603d1803SSebastian Neubauer         return modifyIntrinsicCall(
1822417de27SMariusz Sikora             II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
183603d1803SSebastian Neubauer               Args.erase(Args.begin() + ImageDimIntr->MipIndex);
184603d1803SSebastian Neubauer             });
185603d1803SSebastian Neubauer       }
186603d1803SSebastian Neubauer     }
187603d1803SSebastian Neubauer   }
188603d1803SSebastian Neubauer 
189603d1803SSebastian Neubauer   // Optimize _bias away when 'bias' is zero
190603d1803SSebastian Neubauer   if (const auto *BiasMappingInfo =
191603d1803SSebastian Neubauer           AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
192603d1803SSebastian Neubauer     if (auto *ConstantBias =
193603d1803SSebastian Neubauer             dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
194603d1803SSebastian Neubauer       if (ConstantBias->isZero()) {
195603d1803SSebastian Neubauer         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
196603d1803SSebastian Neubauer             AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
197603d1803SSebastian Neubauer                                                      ImageDimIntr->Dim);
198603d1803SSebastian Neubauer         return modifyIntrinsicCall(
1992417de27SMariusz Sikora             II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
200603d1803SSebastian Neubauer               Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
201603d1803SSebastian Neubauer               ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
202603d1803SSebastian Neubauer             });
203603d1803SSebastian Neubauer       }
204603d1803SSebastian Neubauer     }
205603d1803SSebastian Neubauer   }
206603d1803SSebastian Neubauer 
20780532ebbSSebastian Neubauer   // Optimize _offset away when 'offset' is zero
20880532ebbSSebastian Neubauer   if (const auto *OffsetMappingInfo =
20980532ebbSSebastian Neubauer           AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) {
21080532ebbSSebastian Neubauer     if (auto *ConstantOffset =
21180532ebbSSebastian Neubauer             dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
21280532ebbSSebastian Neubauer       if (ConstantOffset->isZero()) {
21380532ebbSSebastian Neubauer         const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
21480532ebbSSebastian Neubauer             AMDGPU::getImageDimIntrinsicByBaseOpcode(
21580532ebbSSebastian Neubauer                 OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
21680532ebbSSebastian Neubauer         return modifyIntrinsicCall(
2172417de27SMariusz Sikora             II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
21880532ebbSSebastian Neubauer               Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
21980532ebbSSebastian Neubauer             });
22080532ebbSSebastian Neubauer       }
22180532ebbSSebastian Neubauer     }
22280532ebbSSebastian Neubauer   }
22380532ebbSSebastian Neubauer 
2242417de27SMariusz Sikora   // Try to use D16
2252417de27SMariusz Sikora   if (ST->hasD16Images()) {
2262417de27SMariusz Sikora 
2272417de27SMariusz Sikora     const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2282417de27SMariusz Sikora         AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
2292417de27SMariusz Sikora 
2302417de27SMariusz Sikora     if (BaseOpcode->HasD16) {
2312417de27SMariusz Sikora 
2322417de27SMariusz Sikora       // If the only use of image intrinsic is a fptrunc (with conversion to
2332417de27SMariusz Sikora       // half) then both fptrunc and image intrinsic will be replaced with image
2342417de27SMariusz Sikora       // intrinsic with D16 flag.
2352417de27SMariusz Sikora       if (II.hasOneUse()) {
2362417de27SMariusz Sikora         Instruction *User = II.user_back();
2372417de27SMariusz Sikora 
2382417de27SMariusz Sikora         if (User->getOpcode() == Instruction::FPTrunc &&
2392417de27SMariusz Sikora             User->getType()->getScalarType()->isHalfTy()) {
2402417de27SMariusz Sikora 
2412417de27SMariusz Sikora           return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
2422417de27SMariusz Sikora                                      [&](auto &Args, auto &ArgTys) {
2432417de27SMariusz Sikora                                        // Change return type of image intrinsic.
2442417de27SMariusz Sikora                                        // Set it to return type of fptrunc.
2452417de27SMariusz Sikora                                        ArgTys[0] = User->getType();
2462417de27SMariusz Sikora                                      });
2472417de27SMariusz Sikora         }
2482417de27SMariusz Sikora       }
2492417de27SMariusz Sikora     }
2502417de27SMariusz Sikora   }
2512417de27SMariusz Sikora 
252603d1803SSebastian Neubauer   // Try to use A16 or G16
253b8d19947SSebastian Neubauer   if (!ST->hasA16() && !ST->hasG16())
254b8d19947SSebastian Neubauer     return None;
255b8d19947SSebastian Neubauer 
2564ed7c6eeSSebastian Neubauer   // Address is interpreted as float if the instruction has a sampler or as
2574ed7c6eeSSebastian Neubauer   // unsigned int if there is no sampler.
2584ed7c6eeSSebastian Neubauer   bool HasSampler =
2594ed7c6eeSSebastian Neubauer       AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler;
260b8d19947SSebastian Neubauer   bool FloatCoord = false;
261b8d19947SSebastian Neubauer   // true means derivatives can be converted to 16 bit, coordinates not
262b8d19947SSebastian Neubauer   bool OnlyDerivatives = false;
263b8d19947SSebastian Neubauer 
264b8d19947SSebastian Neubauer   for (unsigned OperandIndex = ImageDimIntr->GradientStart;
265b8d19947SSebastian Neubauer        OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
266b8d19947SSebastian Neubauer     Value *Coord = II.getOperand(OperandIndex);
267b8d19947SSebastian Neubauer     // If the values are not derived from 16-bit values, we cannot optimize.
2684ed7c6eeSSebastian Neubauer     if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
269b8d19947SSebastian Neubauer       if (OperandIndex < ImageDimIntr->CoordStart ||
270b8d19947SSebastian Neubauer           ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
271b8d19947SSebastian Neubauer         return None;
272b8d19947SSebastian Neubauer       }
273b8d19947SSebastian Neubauer       // All gradients can be converted, so convert only them
274b8d19947SSebastian Neubauer       OnlyDerivatives = true;
275b8d19947SSebastian Neubauer       break;
276b8d19947SSebastian Neubauer     }
277b8d19947SSebastian Neubauer 
278b8d19947SSebastian Neubauer     assert(OperandIndex == ImageDimIntr->GradientStart ||
279b8d19947SSebastian Neubauer            FloatCoord == Coord->getType()->isFloatingPointTy());
280b8d19947SSebastian Neubauer     FloatCoord = Coord->getType()->isFloatingPointTy();
281b8d19947SSebastian Neubauer   }
282b8d19947SSebastian Neubauer 
2830530fdbbSSebastian Neubauer   if (!OnlyDerivatives && !ST->hasA16())
284b8d19947SSebastian Neubauer     OnlyDerivatives = true; // Only supports G16
2850530fdbbSSebastian Neubauer 
2860530fdbbSSebastian Neubauer   // Check if there is a bias parameter and if it can be converted to f16
2870530fdbbSSebastian Neubauer   if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
2880530fdbbSSebastian Neubauer     Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
2894ed7c6eeSSebastian Neubauer     assert(HasSampler &&
2904ed7c6eeSSebastian Neubauer            "Only image instructions with a sampler can have a bias");
2914ed7c6eeSSebastian Neubauer     if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
2920530fdbbSSebastian Neubauer       OnlyDerivatives = true;
293b8d19947SSebastian Neubauer   }
294b8d19947SSebastian Neubauer 
2950530fdbbSSebastian Neubauer   if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
2960530fdbbSSebastian Neubauer                                                ImageDimIntr->CoordStart))
2970530fdbbSSebastian Neubauer     return None;
2980530fdbbSSebastian Neubauer 
299b8d19947SSebastian Neubauer   Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
300b8d19947SSebastian Neubauer                                : Type::getInt16Ty(II.getContext());
301b8d19947SSebastian Neubauer 
302603d1803SSebastian Neubauer   return modifyIntrinsicCall(
3032417de27SMariusz Sikora       II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
304b8d19947SSebastian Neubauer         ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
3050530fdbbSSebastian Neubauer         if (!OnlyDerivatives) {
306b8d19947SSebastian Neubauer           ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
3070530fdbbSSebastian Neubauer 
3080530fdbbSSebastian Neubauer           // Change the bias type
3090530fdbbSSebastian Neubauer           if (ImageDimIntr->NumBiasArgs != 0)
3100530fdbbSSebastian Neubauer             ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
3110530fdbbSSebastian Neubauer         }
312b8d19947SSebastian Neubauer 
313b8d19947SSebastian Neubauer         unsigned EndIndex =
314b8d19947SSebastian Neubauer             OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
315b8d19947SSebastian Neubauer         for (unsigned OperandIndex = ImageDimIntr->GradientStart;
316b8d19947SSebastian Neubauer              OperandIndex < EndIndex; OperandIndex++) {
317b8d19947SSebastian Neubauer           Args[OperandIndex] =
318b8d19947SSebastian Neubauer               convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
319b8d19947SSebastian Neubauer         }
320b8d19947SSebastian Neubauer 
3210530fdbbSSebastian Neubauer         // Convert the bias
3220530fdbbSSebastian Neubauer         if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
3230530fdbbSSebastian Neubauer           Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
3240530fdbbSSebastian Neubauer           Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
3250530fdbbSSebastian Neubauer         }
326603d1803SSebastian Neubauer       });
327b8d19947SSebastian Neubauer }
328b8d19947SSebastian Neubauer 
canSimplifyLegacyMulToMul(const Value * Op0,const Value * Op1,InstCombiner & IC) const329958130dfSJay Foad bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
330958130dfSJay Foad                                            InstCombiner &IC) const {
331958130dfSJay Foad   // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
332958130dfSJay Foad   // infinity, gives +0.0. If we can prove we don't have one of the special
333958130dfSJay Foad   // cases then we can use a normal multiply instead.
334958130dfSJay Foad   // TODO: Create and use isKnownFiniteNonZero instead of just matching
335958130dfSJay Foad   // constants here.
336958130dfSJay Foad   if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
337958130dfSJay Foad       match(Op1, PatternMatch::m_FiniteNonZero())) {
338958130dfSJay Foad     // One operand is not zero or infinity or NaN.
339958130dfSJay Foad     return true;
340958130dfSJay Foad   }
341958130dfSJay Foad   auto *TLI = &IC.getTargetLibraryInfo();
342958130dfSJay Foad   if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
343958130dfSJay Foad       isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
344958130dfSJay Foad     // Neither operand is infinity or NaN.
345958130dfSJay Foad     return true;
346958130dfSJay Foad   }
347958130dfSJay Foad   return false;
348958130dfSJay Foad }
349958130dfSJay Foad 
3502a6c8715SSebastian Neubauer Optional<Instruction *>
instCombineIntrinsic(InstCombiner & IC,IntrinsicInst & II) const3512a6c8715SSebastian Neubauer GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
3522a6c8715SSebastian Neubauer   Intrinsic::ID IID = II.getIntrinsicID();
3532a6c8715SSebastian Neubauer   switch (IID) {
3542a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_rcp: {
3552a6c8715SSebastian Neubauer     Value *Src = II.getArgOperand(0);
3562a6c8715SSebastian Neubauer 
3572a6c8715SSebastian Neubauer     // TODO: Move to ConstantFolding/InstSimplify?
3582a6c8715SSebastian Neubauer     if (isa<UndefValue>(Src)) {
3592a6c8715SSebastian Neubauer       Type *Ty = II.getType();
3602a6c8715SSebastian Neubauer       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
3612a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, QNaN);
3622a6c8715SSebastian Neubauer     }
3632a6c8715SSebastian Neubauer 
3642a6c8715SSebastian Neubauer     if (II.isStrictFP())
3652a6c8715SSebastian Neubauer       break;
3662a6c8715SSebastian Neubauer 
3672a6c8715SSebastian Neubauer     if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3682a6c8715SSebastian Neubauer       const APFloat &ArgVal = C->getValueAPF();
3692a6c8715SSebastian Neubauer       APFloat Val(ArgVal.getSemantics(), 1);
3702a6c8715SSebastian Neubauer       Val.divide(ArgVal, APFloat::rmNearestTiesToEven);
3712a6c8715SSebastian Neubauer 
3722a6c8715SSebastian Neubauer       // This is more precise than the instruction may give.
3732a6c8715SSebastian Neubauer       //
3742a6c8715SSebastian Neubauer       // TODO: The instruction always flushes denormal results (except for f16),
3752a6c8715SSebastian Neubauer       // should this also?
3762a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
3772a6c8715SSebastian Neubauer     }
3782a6c8715SSebastian Neubauer 
3792a6c8715SSebastian Neubauer     break;
3802a6c8715SSebastian Neubauer   }
3812a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_rsq: {
3822a6c8715SSebastian Neubauer     Value *Src = II.getArgOperand(0);
3832a6c8715SSebastian Neubauer 
3842a6c8715SSebastian Neubauer     // TODO: Move to ConstantFolding/InstSimplify?
3852a6c8715SSebastian Neubauer     if (isa<UndefValue>(Src)) {
3862a6c8715SSebastian Neubauer       Type *Ty = II.getType();
3872a6c8715SSebastian Neubauer       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
3882a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, QNaN);
3892a6c8715SSebastian Neubauer     }
3902a6c8715SSebastian Neubauer 
3912a6c8715SSebastian Neubauer     break;
3922a6c8715SSebastian Neubauer   }
3932a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_frexp_mant:
3942a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_frexp_exp: {
3952a6c8715SSebastian Neubauer     Value *Src = II.getArgOperand(0);
3962a6c8715SSebastian Neubauer     if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3972a6c8715SSebastian Neubauer       int Exp;
3982a6c8715SSebastian Neubauer       APFloat Significand =
3992a6c8715SSebastian Neubauer           frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
4002a6c8715SSebastian Neubauer 
4012a6c8715SSebastian Neubauer       if (IID == Intrinsic::amdgcn_frexp_mant) {
4022a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(
4032a6c8715SSebastian Neubauer             II, ConstantFP::get(II.getContext(), Significand));
4042a6c8715SSebastian Neubauer       }
4052a6c8715SSebastian Neubauer 
4062a6c8715SSebastian Neubauer       // Match instruction special case behavior.
4072a6c8715SSebastian Neubauer       if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
4082a6c8715SSebastian Neubauer         Exp = 0;
4092a6c8715SSebastian Neubauer 
4102a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
4112a6c8715SSebastian Neubauer     }
4122a6c8715SSebastian Neubauer 
4132a6c8715SSebastian Neubauer     if (isa<UndefValue>(Src)) {
4142a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
4152a6c8715SSebastian Neubauer     }
4162a6c8715SSebastian Neubauer 
4172a6c8715SSebastian Neubauer     break;
4182a6c8715SSebastian Neubauer   }
4192a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_class: {
4202a6c8715SSebastian Neubauer     enum {
4212a6c8715SSebastian Neubauer       S_NAN = 1 << 0,       // Signaling NaN
4222a6c8715SSebastian Neubauer       Q_NAN = 1 << 1,       // Quiet NaN
4232a6c8715SSebastian Neubauer       N_INFINITY = 1 << 2,  // Negative infinity
4242a6c8715SSebastian Neubauer       N_NORMAL = 1 << 3,    // Negative normal
4252a6c8715SSebastian Neubauer       N_SUBNORMAL = 1 << 4, // Negative subnormal
4262a6c8715SSebastian Neubauer       N_ZERO = 1 << 5,      // Negative zero
4272a6c8715SSebastian Neubauer       P_ZERO = 1 << 6,      // Positive zero
4282a6c8715SSebastian Neubauer       P_SUBNORMAL = 1 << 7, // Positive subnormal
4292a6c8715SSebastian Neubauer       P_NORMAL = 1 << 8,    // Positive normal
4302a6c8715SSebastian Neubauer       P_INFINITY = 1 << 9   // Positive infinity
4312a6c8715SSebastian Neubauer     };
4322a6c8715SSebastian Neubauer 
4332a6c8715SSebastian Neubauer     const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
4342a6c8715SSebastian Neubauer                               N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL |
4352a6c8715SSebastian Neubauer                               P_NORMAL | P_INFINITY;
4362a6c8715SSebastian Neubauer 
4372a6c8715SSebastian Neubauer     Value *Src0 = II.getArgOperand(0);
4382a6c8715SSebastian Neubauer     Value *Src1 = II.getArgOperand(1);
4392a6c8715SSebastian Neubauer     const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
4402a6c8715SSebastian Neubauer     if (!CMask) {
4412a6c8715SSebastian Neubauer       if (isa<UndefValue>(Src0)) {
4422a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
4432a6c8715SSebastian Neubauer       }
4442a6c8715SSebastian Neubauer 
4452a6c8715SSebastian Neubauer       if (isa<UndefValue>(Src1)) {
4462a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II,
4472a6c8715SSebastian Neubauer                                       ConstantInt::get(II.getType(), false));
4482a6c8715SSebastian Neubauer       }
4492a6c8715SSebastian Neubauer       break;
4502a6c8715SSebastian Neubauer     }
4512a6c8715SSebastian Neubauer 
4522a6c8715SSebastian Neubauer     uint32_t Mask = CMask->getZExtValue();
4532a6c8715SSebastian Neubauer 
4542a6c8715SSebastian Neubauer     // If all tests are made, it doesn't matter what the value is.
4552a6c8715SSebastian Neubauer     if ((Mask & FullMask) == FullMask) {
4562a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
4572a6c8715SSebastian Neubauer     }
4582a6c8715SSebastian Neubauer 
4592a6c8715SSebastian Neubauer     if ((Mask & FullMask) == 0) {
4602a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
4612a6c8715SSebastian Neubauer     }
4622a6c8715SSebastian Neubauer 
4632a6c8715SSebastian Neubauer     if (Mask == (S_NAN | Q_NAN)) {
4642a6c8715SSebastian Neubauer       // Equivalent of isnan. Replace with standard fcmp.
4652a6c8715SSebastian Neubauer       Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
4662a6c8715SSebastian Neubauer       FCmp->takeName(&II);
4672a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, FCmp);
4682a6c8715SSebastian Neubauer     }
4692a6c8715SSebastian Neubauer 
4702a6c8715SSebastian Neubauer     if (Mask == (N_ZERO | P_ZERO)) {
4712a6c8715SSebastian Neubauer       // Equivalent of == 0.
4722a6c8715SSebastian Neubauer       Value *FCmp =
4732a6c8715SSebastian Neubauer           IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
4742a6c8715SSebastian Neubauer 
4752a6c8715SSebastian Neubauer       FCmp->takeName(&II);
4762a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, FCmp);
4772a6c8715SSebastian Neubauer     }
4782a6c8715SSebastian Neubauer 
4792a6c8715SSebastian Neubauer     // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
4802a6c8715SSebastian Neubauer     if (((Mask & S_NAN) || (Mask & Q_NAN)) &&
4812a6c8715SSebastian Neubauer         isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
4822a6c8715SSebastian Neubauer       return IC.replaceOperand(
4832a6c8715SSebastian Neubauer           II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN)));
4842a6c8715SSebastian Neubauer     }
4852a6c8715SSebastian Neubauer 
4862a6c8715SSebastian Neubauer     const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
4872a6c8715SSebastian Neubauer     if (!CVal) {
4882a6c8715SSebastian Neubauer       if (isa<UndefValue>(Src0)) {
4892a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
4902a6c8715SSebastian Neubauer       }
4912a6c8715SSebastian Neubauer 
4922a6c8715SSebastian Neubauer       // Clamp mask to used bits
4932a6c8715SSebastian Neubauer       if ((Mask & FullMask) != Mask) {
4942a6c8715SSebastian Neubauer         CallInst *NewCall = IC.Builder.CreateCall(
4952a6c8715SSebastian Neubauer             II.getCalledFunction(),
4962a6c8715SSebastian Neubauer             {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)});
4972a6c8715SSebastian Neubauer 
4982a6c8715SSebastian Neubauer         NewCall->takeName(&II);
4992a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II, NewCall);
5002a6c8715SSebastian Neubauer       }
5012a6c8715SSebastian Neubauer 
5022a6c8715SSebastian Neubauer       break;
5032a6c8715SSebastian Neubauer     }
5042a6c8715SSebastian Neubauer 
5052a6c8715SSebastian Neubauer     const APFloat &Val = CVal->getValueAPF();
5062a6c8715SSebastian Neubauer 
5072a6c8715SSebastian Neubauer     bool Result =
5082a6c8715SSebastian Neubauer         ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
5092a6c8715SSebastian Neubauer         ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
5102a6c8715SSebastian Neubauer         ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
5112a6c8715SSebastian Neubauer         ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
5122a6c8715SSebastian Neubauer         ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
5132a6c8715SSebastian Neubauer         ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
5142a6c8715SSebastian Neubauer         ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
5152a6c8715SSebastian Neubauer         ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
5162a6c8715SSebastian Neubauer         ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
5172a6c8715SSebastian Neubauer         ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
5182a6c8715SSebastian Neubauer 
5192a6c8715SSebastian Neubauer     return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
5202a6c8715SSebastian Neubauer   }
5212a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_cvt_pkrtz: {
5222a6c8715SSebastian Neubauer     Value *Src0 = II.getArgOperand(0);
5232a6c8715SSebastian Neubauer     Value *Src1 = II.getArgOperand(1);
5242a6c8715SSebastian Neubauer     if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
5252a6c8715SSebastian Neubauer       if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
5262a6c8715SSebastian Neubauer         const fltSemantics &HalfSem =
5272a6c8715SSebastian Neubauer             II.getType()->getScalarType()->getFltSemantics();
5282a6c8715SSebastian Neubauer         bool LosesInfo;
5292a6c8715SSebastian Neubauer         APFloat Val0 = C0->getValueAPF();
5302a6c8715SSebastian Neubauer         APFloat Val1 = C1->getValueAPF();
5312a6c8715SSebastian Neubauer         Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
5322a6c8715SSebastian Neubauer         Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
5332a6c8715SSebastian Neubauer 
5342a6c8715SSebastian Neubauer         Constant *Folded =
5352a6c8715SSebastian Neubauer             ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
5362a6c8715SSebastian Neubauer                                  ConstantFP::get(II.getContext(), Val1)});
5372a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II, Folded);
5382a6c8715SSebastian Neubauer       }
5392a6c8715SSebastian Neubauer     }
5402a6c8715SSebastian Neubauer 
5412a6c8715SSebastian Neubauer     if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
5422a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
5432a6c8715SSebastian Neubauer     }
5442a6c8715SSebastian Neubauer 
5452a6c8715SSebastian Neubauer     break;
5462a6c8715SSebastian Neubauer   }
5472a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_cvt_pknorm_i16:
5482a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_cvt_pknorm_u16:
5492a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_cvt_pk_i16:
5502a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_cvt_pk_u16: {
5512a6c8715SSebastian Neubauer     Value *Src0 = II.getArgOperand(0);
5522a6c8715SSebastian Neubauer     Value *Src1 = II.getArgOperand(1);
5532a6c8715SSebastian Neubauer 
5542a6c8715SSebastian Neubauer     if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
5552a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
5562a6c8715SSebastian Neubauer     }
5572a6c8715SSebastian Neubauer 
5582a6c8715SSebastian Neubauer     break;
5592a6c8715SSebastian Neubauer   }
5602a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_ubfe:
5612a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_sbfe: {
5622a6c8715SSebastian Neubauer     // Decompose simple cases into standard shifts.
5632a6c8715SSebastian Neubauer     Value *Src = II.getArgOperand(0);
5642a6c8715SSebastian Neubauer     if (isa<UndefValue>(Src)) {
5652a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, Src);
5662a6c8715SSebastian Neubauer     }
5672a6c8715SSebastian Neubauer 
5682a6c8715SSebastian Neubauer     unsigned Width;
5692a6c8715SSebastian Neubauer     Type *Ty = II.getType();
5702a6c8715SSebastian Neubauer     unsigned IntSize = Ty->getIntegerBitWidth();
5712a6c8715SSebastian Neubauer 
5722a6c8715SSebastian Neubauer     ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
5732a6c8715SSebastian Neubauer     if (CWidth) {
5742a6c8715SSebastian Neubauer       Width = CWidth->getZExtValue();
5752a6c8715SSebastian Neubauer       if ((Width & (IntSize - 1)) == 0) {
5762a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(Ty));
5772a6c8715SSebastian Neubauer       }
5782a6c8715SSebastian Neubauer 
5792a6c8715SSebastian Neubauer       // Hardware ignores high bits, so remove those.
5802a6c8715SSebastian Neubauer       if (Width >= IntSize) {
5812a6c8715SSebastian Neubauer         return IC.replaceOperand(
5822a6c8715SSebastian Neubauer             II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
5832a6c8715SSebastian Neubauer       }
5842a6c8715SSebastian Neubauer     }
5852a6c8715SSebastian Neubauer 
5862a6c8715SSebastian Neubauer     unsigned Offset;
5872a6c8715SSebastian Neubauer     ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
5882a6c8715SSebastian Neubauer     if (COffset) {
5892a6c8715SSebastian Neubauer       Offset = COffset->getZExtValue();
5902a6c8715SSebastian Neubauer       if (Offset >= IntSize) {
5912a6c8715SSebastian Neubauer         return IC.replaceOperand(
5922a6c8715SSebastian Neubauer             II, 1,
5932a6c8715SSebastian Neubauer             ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
5942a6c8715SSebastian Neubauer       }
5952a6c8715SSebastian Neubauer     }
5962a6c8715SSebastian Neubauer 
5972a6c8715SSebastian Neubauer     bool Signed = IID == Intrinsic::amdgcn_sbfe;
5982a6c8715SSebastian Neubauer 
5992a6c8715SSebastian Neubauer     if (!CWidth || !COffset)
6002a6c8715SSebastian Neubauer       break;
6012a6c8715SSebastian Neubauer 
602dc6e8dfdSJacob Lambert     // The case of Width == 0 is handled above, which makes this transformation
6032a6c8715SSebastian Neubauer     // safe.  If Width == 0, then the ashr and lshr instructions become poison
6042a6c8715SSebastian Neubauer     // value since the shift amount would be equal to the bit size.
6052a6c8715SSebastian Neubauer     assert(Width != 0);
6062a6c8715SSebastian Neubauer 
6072a6c8715SSebastian Neubauer     // TODO: This allows folding to undef when the hardware has specific
6082a6c8715SSebastian Neubauer     // behavior?
6092a6c8715SSebastian Neubauer     if (Offset + Width < IntSize) {
6102a6c8715SSebastian Neubauer       Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
6112a6c8715SSebastian Neubauer       Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
6122a6c8715SSebastian Neubauer                                  : IC.Builder.CreateLShr(Shl, IntSize - Width);
6132a6c8715SSebastian Neubauer       RightShift->takeName(&II);
6142a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, RightShift);
6152a6c8715SSebastian Neubauer     }
6162a6c8715SSebastian Neubauer 
6172a6c8715SSebastian Neubauer     Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
6182a6c8715SSebastian Neubauer                                : IC.Builder.CreateLShr(Src, Offset);
6192a6c8715SSebastian Neubauer 
6202a6c8715SSebastian Neubauer     RightShift->takeName(&II);
6212a6c8715SSebastian Neubauer     return IC.replaceInstUsesWith(II, RightShift);
6222a6c8715SSebastian Neubauer   }
6232a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_exp:
624*445a483bSJay Foad   case Intrinsic::amdgcn_exp_row:
6252a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_exp_compr: {
6262a6c8715SSebastian Neubauer     ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
6272a6c8715SSebastian Neubauer     unsigned EnBits = En->getZExtValue();
6282a6c8715SSebastian Neubauer     if (EnBits == 0xf)
6292a6c8715SSebastian Neubauer       break; // All inputs enabled.
6302a6c8715SSebastian Neubauer 
6312a6c8715SSebastian Neubauer     bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
6322a6c8715SSebastian Neubauer     bool Changed = false;
6332a6c8715SSebastian Neubauer     for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
6342a6c8715SSebastian Neubauer       if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
6352a6c8715SSebastian Neubauer           (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
6362a6c8715SSebastian Neubauer         Value *Src = II.getArgOperand(I + 2);
6372a6c8715SSebastian Neubauer         if (!isa<UndefValue>(Src)) {
6382a6c8715SSebastian Neubauer           IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
6392a6c8715SSebastian Neubauer           Changed = true;
6402a6c8715SSebastian Neubauer         }
6412a6c8715SSebastian Neubauer       }
6422a6c8715SSebastian Neubauer     }
6432a6c8715SSebastian Neubauer 
6442a6c8715SSebastian Neubauer     if (Changed) {
6452a6c8715SSebastian Neubauer       return &II;
6462a6c8715SSebastian Neubauer     }
6472a6c8715SSebastian Neubauer 
6482a6c8715SSebastian Neubauer     break;
6492a6c8715SSebastian Neubauer   }
6502a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_fmed3: {
6512a6c8715SSebastian Neubauer     // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
6522a6c8715SSebastian Neubauer     // for the shader.
6532a6c8715SSebastian Neubauer 
6542a6c8715SSebastian Neubauer     Value *Src0 = II.getArgOperand(0);
6552a6c8715SSebastian Neubauer     Value *Src1 = II.getArgOperand(1);
6562a6c8715SSebastian Neubauer     Value *Src2 = II.getArgOperand(2);
6572a6c8715SSebastian Neubauer 
6582a6c8715SSebastian Neubauer     // Checking for NaN before canonicalization provides better fidelity when
6592a6c8715SSebastian Neubauer     // mapping other operations onto fmed3 since the order of operands is
6602a6c8715SSebastian Neubauer     // unchanged.
6612a6c8715SSebastian Neubauer     CallInst *NewCall = nullptr;
6622a6c8715SSebastian Neubauer     if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
6632a6c8715SSebastian Neubauer       NewCall = IC.Builder.CreateMinNum(Src1, Src2);
6642a6c8715SSebastian Neubauer     } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
6652a6c8715SSebastian Neubauer       NewCall = IC.Builder.CreateMinNum(Src0, Src2);
6662a6c8715SSebastian Neubauer     } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
6672a6c8715SSebastian Neubauer       NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
6682a6c8715SSebastian Neubauer     }
6692a6c8715SSebastian Neubauer 
6702a6c8715SSebastian Neubauer     if (NewCall) {
6712a6c8715SSebastian Neubauer       NewCall->copyFastMathFlags(&II);
6722a6c8715SSebastian Neubauer       NewCall->takeName(&II);
6732a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, NewCall);
6742a6c8715SSebastian Neubauer     }
6752a6c8715SSebastian Neubauer 
6762a6c8715SSebastian Neubauer     bool Swap = false;
6772a6c8715SSebastian Neubauer     // Canonicalize constants to RHS operands.
6782a6c8715SSebastian Neubauer     //
6792a6c8715SSebastian Neubauer     // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
6802a6c8715SSebastian Neubauer     if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
6812a6c8715SSebastian Neubauer       std::swap(Src0, Src1);
6822a6c8715SSebastian Neubauer       Swap = true;
6832a6c8715SSebastian Neubauer     }
6842a6c8715SSebastian Neubauer 
6852a6c8715SSebastian Neubauer     if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
6862a6c8715SSebastian Neubauer       std::swap(Src1, Src2);
6872a6c8715SSebastian Neubauer       Swap = true;
6882a6c8715SSebastian Neubauer     }
6892a6c8715SSebastian Neubauer 
6902a6c8715SSebastian Neubauer     if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
6912a6c8715SSebastian Neubauer       std::swap(Src0, Src1);
6922a6c8715SSebastian Neubauer       Swap = true;
6932a6c8715SSebastian Neubauer     }
6942a6c8715SSebastian Neubauer 
6952a6c8715SSebastian Neubauer     if (Swap) {
6962a6c8715SSebastian Neubauer       II.setArgOperand(0, Src0);
6972a6c8715SSebastian Neubauer       II.setArgOperand(1, Src1);
6982a6c8715SSebastian Neubauer       II.setArgOperand(2, Src2);
6992a6c8715SSebastian Neubauer       return &II;
7002a6c8715SSebastian Neubauer     }
7012a6c8715SSebastian Neubauer 
7022a6c8715SSebastian Neubauer     if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
7032a6c8715SSebastian Neubauer       if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
7042a6c8715SSebastian Neubauer         if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
7052a6c8715SSebastian Neubauer           APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
7062a6c8715SSebastian Neubauer                                        C2->getValueAPF());
7072a6c8715SSebastian Neubauer           return IC.replaceInstUsesWith(
7082a6c8715SSebastian Neubauer               II, ConstantFP::get(IC.Builder.getContext(), Result));
7092a6c8715SSebastian Neubauer         }
7102a6c8715SSebastian Neubauer       }
7112a6c8715SSebastian Neubauer     }
7122a6c8715SSebastian Neubauer 
7132a6c8715SSebastian Neubauer     break;
7142a6c8715SSebastian Neubauer   }
7152a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_icmp:
7162a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_fcmp: {
7172a6c8715SSebastian Neubauer     const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
7182a6c8715SSebastian Neubauer     // Guard against invalid arguments.
7192a6c8715SSebastian Neubauer     int64_t CCVal = CC->getZExtValue();
7202a6c8715SSebastian Neubauer     bool IsInteger = IID == Intrinsic::amdgcn_icmp;
7212a6c8715SSebastian Neubauer     if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
7222a6c8715SSebastian Neubauer                        CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
7232a6c8715SSebastian Neubauer         (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
7242a6c8715SSebastian Neubauer                         CCVal > CmpInst::LAST_FCMP_PREDICATE)))
7252a6c8715SSebastian Neubauer       break;
7262a6c8715SSebastian Neubauer 
7272a6c8715SSebastian Neubauer     Value *Src0 = II.getArgOperand(0);
7282a6c8715SSebastian Neubauer     Value *Src1 = II.getArgOperand(1);
7292a6c8715SSebastian Neubauer 
7302a6c8715SSebastian Neubauer     if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
7312a6c8715SSebastian Neubauer       if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
7322a6c8715SSebastian Neubauer         Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
7332a6c8715SSebastian Neubauer         if (CCmp->isNullValue()) {
7342a6c8715SSebastian Neubauer           return IC.replaceInstUsesWith(
7352a6c8715SSebastian Neubauer               II, ConstantExpr::getSExt(CCmp, II.getType()));
7362a6c8715SSebastian Neubauer         }
7372a6c8715SSebastian Neubauer 
7382a6c8715SSebastian Neubauer         // The result of V_ICMP/V_FCMP assembly instructions (which this
7392a6c8715SSebastian Neubauer         // intrinsic exposes) is one bit per thread, masked with the EXEC
7402a6c8715SSebastian Neubauer         // register (which contains the bitmask of live threads). So a
7412a6c8715SSebastian Neubauer         // comparison that always returns true is the same as a read of the
7422a6c8715SSebastian Neubauer         // EXEC register.
7432a6c8715SSebastian Neubauer         Function *NewF = Intrinsic::getDeclaration(
7442a6c8715SSebastian Neubauer             II.getModule(), Intrinsic::read_register, II.getType());
7452a6c8715SSebastian Neubauer         Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
7462a6c8715SSebastian Neubauer         MDNode *MD = MDNode::get(II.getContext(), MDArgs);
7472a6c8715SSebastian Neubauer         Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
7482a6c8715SSebastian Neubauer         CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
7493f4d00bcSArthur Eubanks         NewCall->addFnAttr(Attribute::Convergent);
7502a6c8715SSebastian Neubauer         NewCall->takeName(&II);
7512a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II, NewCall);
7522a6c8715SSebastian Neubauer       }
7532a6c8715SSebastian Neubauer 
7542a6c8715SSebastian Neubauer       // Canonicalize constants to RHS.
7552a6c8715SSebastian Neubauer       CmpInst::Predicate SwapPred =
7562a6c8715SSebastian Neubauer           CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
7572a6c8715SSebastian Neubauer       II.setArgOperand(0, Src1);
7582a6c8715SSebastian Neubauer       II.setArgOperand(1, Src0);
7592a6c8715SSebastian Neubauer       II.setArgOperand(
7602a6c8715SSebastian Neubauer           2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
7612a6c8715SSebastian Neubauer       return &II;
7622a6c8715SSebastian Neubauer     }
7632a6c8715SSebastian Neubauer 
7642a6c8715SSebastian Neubauer     if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
7652a6c8715SSebastian Neubauer       break;
7662a6c8715SSebastian Neubauer 
7672a6c8715SSebastian Neubauer     // Canonicalize compare eq with true value to compare != 0
7682a6c8715SSebastian Neubauer     // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
7692a6c8715SSebastian Neubauer     //   -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
7702a6c8715SSebastian Neubauer     // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
7712a6c8715SSebastian Neubauer     //   -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
7722a6c8715SSebastian Neubauer     Value *ExtSrc;
7732a6c8715SSebastian Neubauer     if (CCVal == CmpInst::ICMP_EQ &&
7742a6c8715SSebastian Neubauer         ((match(Src1, PatternMatch::m_One()) &&
7752a6c8715SSebastian Neubauer           match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
7762a6c8715SSebastian Neubauer          (match(Src1, PatternMatch::m_AllOnes()) &&
7772a6c8715SSebastian Neubauer           match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
7782a6c8715SSebastian Neubauer         ExtSrc->getType()->isIntegerTy(1)) {
7792a6c8715SSebastian Neubauer       IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType()));
7802a6c8715SSebastian Neubauer       IC.replaceOperand(II, 2,
7812a6c8715SSebastian Neubauer                         ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
7822a6c8715SSebastian Neubauer       return &II;
7832a6c8715SSebastian Neubauer     }
7842a6c8715SSebastian Neubauer 
7852a6c8715SSebastian Neubauer     CmpInst::Predicate SrcPred;
7862a6c8715SSebastian Neubauer     Value *SrcLHS;
7872a6c8715SSebastian Neubauer     Value *SrcRHS;
7882a6c8715SSebastian Neubauer 
7892a6c8715SSebastian Neubauer     // Fold compare eq/ne with 0 from a compare result as the predicate to the
7902a6c8715SSebastian Neubauer     // intrinsic. The typical use is a wave vote function in the library, which
7912a6c8715SSebastian Neubauer     // will be fed from a user code condition compared with 0. Fold in the
7922a6c8715SSebastian Neubauer     // redundant compare.
7932a6c8715SSebastian Neubauer 
7942a6c8715SSebastian Neubauer     // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
7952a6c8715SSebastian Neubauer     //   -> llvm.amdgcn.[if]cmp(a, b, pred)
7962a6c8715SSebastian Neubauer     //
7972a6c8715SSebastian Neubauer     // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
7982a6c8715SSebastian Neubauer     //   -> llvm.amdgcn.[if]cmp(a, b, inv pred)
7992a6c8715SSebastian Neubauer     if (match(Src1, PatternMatch::m_Zero()) &&
8002a6c8715SSebastian Neubauer         match(Src0, PatternMatch::m_ZExtOrSExt(
8012a6c8715SSebastian Neubauer                         m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
8022a6c8715SSebastian Neubauer                               PatternMatch::m_Value(SrcRHS))))) {
8032a6c8715SSebastian Neubauer       if (CCVal == CmpInst::ICMP_EQ)
8042a6c8715SSebastian Neubauer         SrcPred = CmpInst::getInversePredicate(SrcPred);
8052a6c8715SSebastian Neubauer 
8062a6c8715SSebastian Neubauer       Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
8072a6c8715SSebastian Neubauer                                  ? Intrinsic::amdgcn_fcmp
8082a6c8715SSebastian Neubauer                                  : Intrinsic::amdgcn_icmp;
8092a6c8715SSebastian Neubauer 
8102a6c8715SSebastian Neubauer       Type *Ty = SrcLHS->getType();
8112a6c8715SSebastian Neubauer       if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
8122a6c8715SSebastian Neubauer         // Promote to next legal integer type.
8132a6c8715SSebastian Neubauer         unsigned Width = CmpType->getBitWidth();
8142a6c8715SSebastian Neubauer         unsigned NewWidth = Width;
8152a6c8715SSebastian Neubauer 
8162a6c8715SSebastian Neubauer         // Don't do anything for i1 comparisons.
8172a6c8715SSebastian Neubauer         if (Width == 1)
8182a6c8715SSebastian Neubauer           break;
8192a6c8715SSebastian Neubauer 
8202a6c8715SSebastian Neubauer         if (Width <= 16)
8212a6c8715SSebastian Neubauer           NewWidth = 16;
8222a6c8715SSebastian Neubauer         else if (Width <= 32)
8232a6c8715SSebastian Neubauer           NewWidth = 32;
8242a6c8715SSebastian Neubauer         else if (Width <= 64)
8252a6c8715SSebastian Neubauer           NewWidth = 64;
8262a6c8715SSebastian Neubauer         else if (Width > 64)
8272a6c8715SSebastian Neubauer           break; // Can't handle this.
8282a6c8715SSebastian Neubauer 
8292a6c8715SSebastian Neubauer         if (Width != NewWidth) {
8302a6c8715SSebastian Neubauer           IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
8312a6c8715SSebastian Neubauer           if (CmpInst::isSigned(SrcPred)) {
8322a6c8715SSebastian Neubauer             SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
8332a6c8715SSebastian Neubauer             SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
8342a6c8715SSebastian Neubauer           } else {
8352a6c8715SSebastian Neubauer             SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
8362a6c8715SSebastian Neubauer             SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
8372a6c8715SSebastian Neubauer           }
8382a6c8715SSebastian Neubauer         }
8392a6c8715SSebastian Neubauer       } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
8402a6c8715SSebastian Neubauer         break;
8412a6c8715SSebastian Neubauer 
8422a6c8715SSebastian Neubauer       Function *NewF = Intrinsic::getDeclaration(
8432a6c8715SSebastian Neubauer           II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
8442a6c8715SSebastian Neubauer       Value *Args[] = {SrcLHS, SrcRHS,
8452a6c8715SSebastian Neubauer                        ConstantInt::get(CC->getType(), SrcPred)};
8462a6c8715SSebastian Neubauer       CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
8472a6c8715SSebastian Neubauer       NewCall->takeName(&II);
8482a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, NewCall);
8492a6c8715SSebastian Neubauer     }
8502a6c8715SSebastian Neubauer 
8512a6c8715SSebastian Neubauer     break;
8522a6c8715SSebastian Neubauer   }
8532a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_ballot: {
8542a6c8715SSebastian Neubauer     if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
8552a6c8715SSebastian Neubauer       if (Src->isZero()) {
8562a6c8715SSebastian Neubauer         // amdgcn.ballot(i1 0) is zero.
8572a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
8582a6c8715SSebastian Neubauer       }
8592a6c8715SSebastian Neubauer 
8602a6c8715SSebastian Neubauer       if (Src->isOne()) {
8612a6c8715SSebastian Neubauer         // amdgcn.ballot(i1 1) is exec.
8622a6c8715SSebastian Neubauer         const char *RegName = "exec";
8632a6c8715SSebastian Neubauer         if (II.getType()->isIntegerTy(32))
8642a6c8715SSebastian Neubauer           RegName = "exec_lo";
8652a6c8715SSebastian Neubauer         else if (!II.getType()->isIntegerTy(64))
8662a6c8715SSebastian Neubauer           break;
8672a6c8715SSebastian Neubauer 
8682a6c8715SSebastian Neubauer         Function *NewF = Intrinsic::getDeclaration(
8692a6c8715SSebastian Neubauer             II.getModule(), Intrinsic::read_register, II.getType());
8702a6c8715SSebastian Neubauer         Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
8712a6c8715SSebastian Neubauer         MDNode *MD = MDNode::get(II.getContext(), MDArgs);
8722a6c8715SSebastian Neubauer         Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
8732a6c8715SSebastian Neubauer         CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
8743f4d00bcSArthur Eubanks         NewCall->addFnAttr(Attribute::Convergent);
8752a6c8715SSebastian Neubauer         NewCall->takeName(&II);
8762a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II, NewCall);
8772a6c8715SSebastian Neubauer       }
8782a6c8715SSebastian Neubauer     }
8792a6c8715SSebastian Neubauer     break;
8802a6c8715SSebastian Neubauer   }
8812a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_wqm_vote: {
8822a6c8715SSebastian Neubauer     // wqm_vote is identity when the argument is constant.
8832a6c8715SSebastian Neubauer     if (!isa<Constant>(II.getArgOperand(0)))
8842a6c8715SSebastian Neubauer       break;
8852a6c8715SSebastian Neubauer 
8862a6c8715SSebastian Neubauer     return IC.replaceInstUsesWith(II, II.getArgOperand(0));
8872a6c8715SSebastian Neubauer   }
8882a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_kill: {
8892a6c8715SSebastian Neubauer     const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
8902a6c8715SSebastian Neubauer     if (!C || !C->getZExtValue())
8912a6c8715SSebastian Neubauer       break;
8922a6c8715SSebastian Neubauer 
8932a6c8715SSebastian Neubauer     // amdgcn.kill(i1 1) is a no-op
8942a6c8715SSebastian Neubauer     return IC.eraseInstFromFunction(II);
8952a6c8715SSebastian Neubauer   }
8962a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_update_dpp: {
8972a6c8715SSebastian Neubauer     Value *Old = II.getArgOperand(0);
8982a6c8715SSebastian Neubauer 
8992a6c8715SSebastian Neubauer     auto *BC = cast<ConstantInt>(II.getArgOperand(5));
9002a6c8715SSebastian Neubauer     auto *RM = cast<ConstantInt>(II.getArgOperand(3));
9012a6c8715SSebastian Neubauer     auto *BM = cast<ConstantInt>(II.getArgOperand(4));
9022a6c8715SSebastian Neubauer     if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
9032a6c8715SSebastian Neubauer         BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
9042a6c8715SSebastian Neubauer       break;
9052a6c8715SSebastian Neubauer 
9062a6c8715SSebastian Neubauer     // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
9072a6c8715SSebastian Neubauer     return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
9082a6c8715SSebastian Neubauer   }
9092a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_permlane16:
9102a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_permlanex16: {
9112a6c8715SSebastian Neubauer     // Discard vdst_in if it's not going to be read.
9122a6c8715SSebastian Neubauer     Value *VDstIn = II.getArgOperand(0);
9132a6c8715SSebastian Neubauer     if (isa<UndefValue>(VDstIn))
9142a6c8715SSebastian Neubauer       break;
9152a6c8715SSebastian Neubauer 
9162a6c8715SSebastian Neubauer     ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
9172a6c8715SSebastian Neubauer     ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
9182a6c8715SSebastian Neubauer     if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
9192a6c8715SSebastian Neubauer       break;
9202a6c8715SSebastian Neubauer 
9212a6c8715SSebastian Neubauer     return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
9222a6c8715SSebastian Neubauer   }
923bfcfd53bSJay Foad   case Intrinsic::amdgcn_permlane64:
924bfcfd53bSJay Foad     // A constant value is trivially uniform.
925bfcfd53bSJay Foad     if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
926bfcfd53bSJay Foad       return IC.replaceInstUsesWith(II, C);
927bfcfd53bSJay Foad     }
928bfcfd53bSJay Foad     break;
9292a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_readfirstlane:
9302a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_readlane: {
9312a6c8715SSebastian Neubauer     // A constant value is trivially uniform.
9322a6c8715SSebastian Neubauer     if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
9332a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, C);
9342a6c8715SSebastian Neubauer     }
9352a6c8715SSebastian Neubauer 
9362a6c8715SSebastian Neubauer     // The rest of these may not be safe if the exec may not be the same between
9372a6c8715SSebastian Neubauer     // the def and use.
9382a6c8715SSebastian Neubauer     Value *Src = II.getArgOperand(0);
9392a6c8715SSebastian Neubauer     Instruction *SrcInst = dyn_cast<Instruction>(Src);
9402a6c8715SSebastian Neubauer     if (SrcInst && SrcInst->getParent() != II.getParent())
9412a6c8715SSebastian Neubauer       break;
9422a6c8715SSebastian Neubauer 
9432a6c8715SSebastian Neubauer     // readfirstlane (readfirstlane x) -> readfirstlane x
9442a6c8715SSebastian Neubauer     // readlane (readfirstlane x), y -> readfirstlane x
9452a6c8715SSebastian Neubauer     if (match(Src,
9462a6c8715SSebastian Neubauer               PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
9472a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, Src);
9482a6c8715SSebastian Neubauer     }
9492a6c8715SSebastian Neubauer 
9502a6c8715SSebastian Neubauer     if (IID == Intrinsic::amdgcn_readfirstlane) {
9512a6c8715SSebastian Neubauer       // readfirstlane (readlane x, y) -> readlane x, y
9522a6c8715SSebastian Neubauer       if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
9532a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II, Src);
9542a6c8715SSebastian Neubauer       }
9552a6c8715SSebastian Neubauer     } else {
9562a6c8715SSebastian Neubauer       // readlane (readlane x, y), y -> readlane x, y
9572a6c8715SSebastian Neubauer       if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
9582a6c8715SSebastian Neubauer                          PatternMatch::m_Value(),
9592a6c8715SSebastian Neubauer                          PatternMatch::m_Specific(II.getArgOperand(1))))) {
9602a6c8715SSebastian Neubauer         return IC.replaceInstUsesWith(II, Src);
9612a6c8715SSebastian Neubauer       }
9622a6c8715SSebastian Neubauer     }
9632a6c8715SSebastian Neubauer 
9642a6c8715SSebastian Neubauer     break;
9652a6c8715SSebastian Neubauer   }
9662a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_ldexp: {
9672a6c8715SSebastian Neubauer     // FIXME: This doesn't introduce new instructions and belongs in
9682a6c8715SSebastian Neubauer     // InstructionSimplify.
9692a6c8715SSebastian Neubauer     Type *Ty = II.getType();
9702a6c8715SSebastian Neubauer     Value *Op0 = II.getArgOperand(0);
9712a6c8715SSebastian Neubauer     Value *Op1 = II.getArgOperand(1);
9722a6c8715SSebastian Neubauer 
9732a6c8715SSebastian Neubauer     // Folding undef to qnan is safe regardless of the FP mode.
9742a6c8715SSebastian Neubauer     if (isa<UndefValue>(Op0)) {
9752a6c8715SSebastian Neubauer       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
9762a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, QNaN);
9772a6c8715SSebastian Neubauer     }
9782a6c8715SSebastian Neubauer 
9792a6c8715SSebastian Neubauer     const APFloat *C = nullptr;
9802a6c8715SSebastian Neubauer     match(Op0, PatternMatch::m_APFloat(C));
9812a6c8715SSebastian Neubauer 
9822a6c8715SSebastian Neubauer     // FIXME: Should flush denorms depending on FP mode, but that's ignored
9832a6c8715SSebastian Neubauer     // everywhere else.
9842a6c8715SSebastian Neubauer     //
9852a6c8715SSebastian Neubauer     // These cases should be safe, even with strictfp.
9862a6c8715SSebastian Neubauer     // ldexp(0.0, x) -> 0.0
9872a6c8715SSebastian Neubauer     // ldexp(-0.0, x) -> -0.0
9882a6c8715SSebastian Neubauer     // ldexp(inf, x) -> inf
9892a6c8715SSebastian Neubauer     // ldexp(-inf, x) -> -inf
9902a6c8715SSebastian Neubauer     if (C && (C->isZero() || C->isInfinity())) {
9912a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, Op0);
9922a6c8715SSebastian Neubauer     }
9932a6c8715SSebastian Neubauer 
9942a6c8715SSebastian Neubauer     // With strictfp, be more careful about possibly needing to flush denormals
9952a6c8715SSebastian Neubauer     // or not, and snan behavior depends on ieee_mode.
9962a6c8715SSebastian Neubauer     if (II.isStrictFP())
9972a6c8715SSebastian Neubauer       break;
9982a6c8715SSebastian Neubauer 
9992a6c8715SSebastian Neubauer     if (C && C->isNaN()) {
10002a6c8715SSebastian Neubauer       // FIXME: We just need to make the nan quiet here, but that's unavailable
10012a6c8715SSebastian Neubauer       // on APFloat, only IEEEfloat
10022a6c8715SSebastian Neubauer       auto *Quieted =
10032a6c8715SSebastian Neubauer           ConstantFP::get(Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven));
10042a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, Quieted);
10052a6c8715SSebastian Neubauer     }
10062a6c8715SSebastian Neubauer 
10072a6c8715SSebastian Neubauer     // ldexp(x, 0) -> x
10082a6c8715SSebastian Neubauer     // ldexp(x, undef) -> x
10092a6c8715SSebastian Neubauer     if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
10102a6c8715SSebastian Neubauer       return IC.replaceInstUsesWith(II, Op0);
10112a6c8715SSebastian Neubauer     }
10122a6c8715SSebastian Neubauer 
10132a6c8715SSebastian Neubauer     break;
10142a6c8715SSebastian Neubauer   }
101586a480e9SJay Foad   case Intrinsic::amdgcn_fmul_legacy: {
101686a480e9SJay Foad     Value *Op0 = II.getArgOperand(0);
101786a480e9SJay Foad     Value *Op1 = II.getArgOperand(1);
101886a480e9SJay Foad 
101986a480e9SJay Foad     // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
102086a480e9SJay Foad     // infinity, gives +0.0.
102186a480e9SJay Foad     // TODO: Move to InstSimplify?
102286a480e9SJay Foad     if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
102386a480e9SJay Foad         match(Op1, PatternMatch::m_AnyZeroFP()))
102486a480e9SJay Foad       return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType()));
102586a480e9SJay Foad 
102686a480e9SJay Foad     // If we can prove we don't have one of the special cases then we can use a
102786a480e9SJay Foad     // normal fmul instruction instead.
1028958130dfSJay Foad     if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
102986a480e9SJay Foad       auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
103086a480e9SJay Foad       FMul->takeName(&II);
103186a480e9SJay Foad       return IC.replaceInstUsesWith(II, FMul);
103286a480e9SJay Foad     }
103386a480e9SJay Foad     break;
103486a480e9SJay Foad   }
1035958130dfSJay Foad   case Intrinsic::amdgcn_fma_legacy: {
1036958130dfSJay Foad     Value *Op0 = II.getArgOperand(0);
1037958130dfSJay Foad     Value *Op1 = II.getArgOperand(1);
1038958130dfSJay Foad     Value *Op2 = II.getArgOperand(2);
1039958130dfSJay Foad 
1040958130dfSJay Foad     // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1041958130dfSJay Foad     // infinity, gives +0.0.
1042958130dfSJay Foad     // TODO: Move to InstSimplify?
1043958130dfSJay Foad     if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1044958130dfSJay Foad         match(Op1, PatternMatch::m_AnyZeroFP())) {
1045958130dfSJay Foad       // It's tempting to just return Op2 here, but that would give the wrong
1046958130dfSJay Foad       // result if Op2 was -0.0.
1047958130dfSJay Foad       auto *Zero = ConstantFP::getNullValue(II.getType());
1048958130dfSJay Foad       auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
1049958130dfSJay Foad       FAdd->takeName(&II);
1050958130dfSJay Foad       return IC.replaceInstUsesWith(II, FAdd);
1051958130dfSJay Foad     }
1052958130dfSJay Foad 
1053958130dfSJay Foad     // If we can prove we don't have one of the special cases then we can use a
1054958130dfSJay Foad     // normal fma instead.
1055958130dfSJay Foad     if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
1056958130dfSJay Foad       II.setCalledOperand(Intrinsic::getDeclaration(
1057958130dfSJay Foad           II.getModule(), Intrinsic::fma, II.getType()));
1058958130dfSJay Foad       return &II;
1059958130dfSJay Foad     }
1060958130dfSJay Foad     break;
1061958130dfSJay Foad   }
106245f16eabSMatt Arsenault   case Intrinsic::amdgcn_is_shared:
106345f16eabSMatt Arsenault   case Intrinsic::amdgcn_is_private: {
106445f16eabSMatt Arsenault     if (isa<UndefValue>(II.getArgOperand(0)))
106545f16eabSMatt Arsenault       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
106645f16eabSMatt Arsenault 
106745f16eabSMatt Arsenault     if (isa<ConstantPointerNull>(II.getArgOperand(0)))
106845f16eabSMatt Arsenault       return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
106945f16eabSMatt Arsenault     break;
107045f16eabSMatt Arsenault   }
1071b8d19947SSebastian Neubauer   default: {
1072b8d19947SSebastian Neubauer     if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1073b8d19947SSebastian Neubauer             AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
1074b8d19947SSebastian Neubauer       return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
1075b8d19947SSebastian Neubauer     }
1076b8d19947SSebastian Neubauer   }
10772a6c8715SSebastian Neubauer   }
10782a6c8715SSebastian Neubauer   return None;
10792a6c8715SSebastian Neubauer }
10802a6c8715SSebastian Neubauer 
10812a6c8715SSebastian Neubauer /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
10822a6c8715SSebastian Neubauer ///
10832a6c8715SSebastian Neubauer /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
10842a6c8715SSebastian Neubauer ///       struct returns.
simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner & IC,IntrinsicInst & II,APInt DemandedElts,int DMaskIdx=-1)1085c6f08b14SBenjamin Kramer static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
10862a6c8715SSebastian Neubauer                                                     IntrinsicInst &II,
10872a6c8715SSebastian Neubauer                                                     APInt DemandedElts,
10882a6c8715SSebastian Neubauer                                                     int DMaskIdx = -1) {
10892a6c8715SSebastian Neubauer 
10903b92db4cSChristopher Tetreault   auto *IIVTy = cast<FixedVectorType>(II.getType());
10912a6c8715SSebastian Neubauer   unsigned VWidth = IIVTy->getNumElements();
10922a6c8715SSebastian Neubauer   if (VWidth == 1)
10932a6c8715SSebastian Neubauer     return nullptr;
10942a6c8715SSebastian Neubauer 
10952a6c8715SSebastian Neubauer   IRBuilderBase::InsertPointGuard Guard(IC.Builder);
10962a6c8715SSebastian Neubauer   IC.Builder.SetInsertPoint(&II);
10972a6c8715SSebastian Neubauer 
10982a6c8715SSebastian Neubauer   // Assume the arguments are unchanged and later override them, if needed.
10990e219b64SKazu Hirata   SmallVector<Value *, 16> Args(II.args());
11002a6c8715SSebastian Neubauer 
11012a6c8715SSebastian Neubauer   if (DMaskIdx < 0) {
11022a6c8715SSebastian Neubauer     // Buffer case.
11032a6c8715SSebastian Neubauer 
11042a6c8715SSebastian Neubauer     const unsigned ActiveBits = DemandedElts.getActiveBits();
11052a6c8715SSebastian Neubauer     const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
11062a6c8715SSebastian Neubauer 
11072a6c8715SSebastian Neubauer     // Start assuming the prefix of elements is demanded, but possibly clear
11082a6c8715SSebastian Neubauer     // some other bits if there are trailing zeros (unused components at front)
11092a6c8715SSebastian Neubauer     // and update offset.
11102a6c8715SSebastian Neubauer     DemandedElts = (1 << ActiveBits) - 1;
11112a6c8715SSebastian Neubauer 
11122a6c8715SSebastian Neubauer     if (UnusedComponentsAtFront > 0) {
11132a6c8715SSebastian Neubauer       static const unsigned InvalidOffsetIdx = 0xf;
11142a6c8715SSebastian Neubauer 
11152a6c8715SSebastian Neubauer       unsigned OffsetIdx;
11162a6c8715SSebastian Neubauer       switch (II.getIntrinsicID()) {
11172a6c8715SSebastian Neubauer       case Intrinsic::amdgcn_raw_buffer_load:
11182a6c8715SSebastian Neubauer         OffsetIdx = 1;
11192a6c8715SSebastian Neubauer         break;
11202a6c8715SSebastian Neubauer       case Intrinsic::amdgcn_s_buffer_load:
11212a6c8715SSebastian Neubauer         // If resulting type is vec3, there is no point in trimming the
11222a6c8715SSebastian Neubauer         // load with updated offset, as the vec3 would most likely be widened to
11232a6c8715SSebastian Neubauer         // vec4 anyway during lowering.
11242a6c8715SSebastian Neubauer         if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
11252a6c8715SSebastian Neubauer           OffsetIdx = InvalidOffsetIdx;
11262a6c8715SSebastian Neubauer         else
11272a6c8715SSebastian Neubauer           OffsetIdx = 1;
11282a6c8715SSebastian Neubauer         break;
11292a6c8715SSebastian Neubauer       case Intrinsic::amdgcn_struct_buffer_load:
11302a6c8715SSebastian Neubauer         OffsetIdx = 2;
11312a6c8715SSebastian Neubauer         break;
11322a6c8715SSebastian Neubauer       default:
11332a6c8715SSebastian Neubauer         // TODO: handle tbuffer* intrinsics.
11342a6c8715SSebastian Neubauer         OffsetIdx = InvalidOffsetIdx;
11352a6c8715SSebastian Neubauer         break;
11362a6c8715SSebastian Neubauer       }
11372a6c8715SSebastian Neubauer 
11382a6c8715SSebastian Neubauer       if (OffsetIdx != InvalidOffsetIdx) {
11392a6c8715SSebastian Neubauer         // Clear demanded bits and update the offset.
11402a6c8715SSebastian Neubauer         DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
11412a6c8715SSebastian Neubauer         auto *Offset = II.getArgOperand(OffsetIdx);
11422a6c8715SSebastian Neubauer         unsigned SingleComponentSizeInBits =
11432a6c8715SSebastian Neubauer             IC.getDataLayout().getTypeSizeInBits(II.getType()->getScalarType());
11442a6c8715SSebastian Neubauer         unsigned OffsetAdd =
11452a6c8715SSebastian Neubauer             UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
11462a6c8715SSebastian Neubauer         auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
11472a6c8715SSebastian Neubauer         Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
11482a6c8715SSebastian Neubauer       }
11492a6c8715SSebastian Neubauer     }
11502a6c8715SSebastian Neubauer   } else {
11512a6c8715SSebastian Neubauer     // Image case.
11522a6c8715SSebastian Neubauer 
11532a6c8715SSebastian Neubauer     ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx));
11542a6c8715SSebastian Neubauer     unsigned DMaskVal = DMask->getZExtValue() & 0xf;
11552a6c8715SSebastian Neubauer 
11562a6c8715SSebastian Neubauer     // Mask off values that are undefined because the dmask doesn't cover them
11572a6c8715SSebastian Neubauer     DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
11582a6c8715SSebastian Neubauer 
11592a6c8715SSebastian Neubauer     unsigned NewDMaskVal = 0;
11602a6c8715SSebastian Neubauer     unsigned OrigLoadIdx = 0;
11612a6c8715SSebastian Neubauer     for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
11622a6c8715SSebastian Neubauer       const unsigned Bit = 1 << SrcIdx;
11632a6c8715SSebastian Neubauer       if (!!(DMaskVal & Bit)) {
11642a6c8715SSebastian Neubauer         if (!!DemandedElts[OrigLoadIdx])
11652a6c8715SSebastian Neubauer           NewDMaskVal |= Bit;
11662a6c8715SSebastian Neubauer         OrigLoadIdx++;
11672a6c8715SSebastian Neubauer       }
11682a6c8715SSebastian Neubauer     }
11692a6c8715SSebastian Neubauer 
11702a6c8715SSebastian Neubauer     if (DMaskVal != NewDMaskVal)
11712a6c8715SSebastian Neubauer       Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
11722a6c8715SSebastian Neubauer   }
11732a6c8715SSebastian Neubauer 
11742a6c8715SSebastian Neubauer   unsigned NewNumElts = DemandedElts.countPopulation();
11752a6c8715SSebastian Neubauer   if (!NewNumElts)
11762a6c8715SSebastian Neubauer     return UndefValue::get(II.getType());
11772a6c8715SSebastian Neubauer 
11782a6c8715SSebastian Neubauer   if (NewNumElts >= VWidth && DemandedElts.isMask()) {
11792a6c8715SSebastian Neubauer     if (DMaskIdx >= 0)
11802a6c8715SSebastian Neubauer       II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
11812a6c8715SSebastian Neubauer     return nullptr;
11822a6c8715SSebastian Neubauer   }
11832a6c8715SSebastian Neubauer 
11842a6c8715SSebastian Neubauer   // Validate function argument and return types, extracting overloaded types
11852a6c8715SSebastian Neubauer   // along the way.
11862a6c8715SSebastian Neubauer   SmallVector<Type *, 6> OverloadTys;
11872a6c8715SSebastian Neubauer   if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
11882a6c8715SSebastian Neubauer     return nullptr;
11892a6c8715SSebastian Neubauer 
11902a6c8715SSebastian Neubauer   Module *M = II.getParent()->getParent()->getParent();
11912a6c8715SSebastian Neubauer   Type *EltTy = IIVTy->getElementType();
11922a6c8715SSebastian Neubauer   Type *NewTy =
11932a6c8715SSebastian Neubauer       (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
11942a6c8715SSebastian Neubauer 
11952a6c8715SSebastian Neubauer   OverloadTys[0] = NewTy;
11962a6c8715SSebastian Neubauer   Function *NewIntrin =
11972a6c8715SSebastian Neubauer       Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys);
11982a6c8715SSebastian Neubauer 
11992a6c8715SSebastian Neubauer   CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
12002a6c8715SSebastian Neubauer   NewCall->takeName(&II);
12012a6c8715SSebastian Neubauer   NewCall->copyMetadata(II);
12022a6c8715SSebastian Neubauer 
12032a6c8715SSebastian Neubauer   if (NewNumElts == 1) {
12042a6c8715SSebastian Neubauer     return IC.Builder.CreateInsertElement(UndefValue::get(II.getType()),
12052a6c8715SSebastian Neubauer                                           NewCall,
12062a6c8715SSebastian Neubauer                                           DemandedElts.countTrailingZeros());
12072a6c8715SSebastian Neubauer   }
12082a6c8715SSebastian Neubauer 
12092a6c8715SSebastian Neubauer   SmallVector<int, 8> EltMask;
12102a6c8715SSebastian Neubauer   unsigned NewLoadIdx = 0;
12112a6c8715SSebastian Neubauer   for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
12122a6c8715SSebastian Neubauer     if (!!DemandedElts[OrigLoadIdx])
12132a6c8715SSebastian Neubauer       EltMask.push_back(NewLoadIdx++);
12142a6c8715SSebastian Neubauer     else
12152a6c8715SSebastian Neubauer       EltMask.push_back(NewNumElts);
12162a6c8715SSebastian Neubauer   }
12172a6c8715SSebastian Neubauer 
12189b296102SJuneyoung Lee   Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
12192a6c8715SSebastian Neubauer 
12202a6c8715SSebastian Neubauer   return Shuffle;
12212a6c8715SSebastian Neubauer }
12222a6c8715SSebastian Neubauer 
simplifyDemandedVectorEltsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedElts,APInt & UndefElts,APInt & UndefElts2,APInt & UndefElts3,std::function<void (Instruction *,unsigned,APInt,APInt &)> SimplifyAndSetOp) const12232a6c8715SSebastian Neubauer Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
12242a6c8715SSebastian Neubauer     InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
12252a6c8715SSebastian Neubauer     APInt &UndefElts2, APInt &UndefElts3,
12262a6c8715SSebastian Neubauer     std::function<void(Instruction *, unsigned, APInt, APInt &)>
12272a6c8715SSebastian Neubauer         SimplifyAndSetOp) const {
12282a6c8715SSebastian Neubauer   switch (II.getIntrinsicID()) {
12292a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_buffer_load:
12302a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_buffer_load_format:
12312a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_raw_buffer_load:
12322a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_raw_buffer_load_format:
12332a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_raw_tbuffer_load:
12342a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_s_buffer_load:
12352a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_struct_buffer_load:
12362a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_struct_buffer_load_format:
12372a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_struct_tbuffer_load:
12382a6c8715SSebastian Neubauer   case Intrinsic::amdgcn_tbuffer_load:
12392a6c8715SSebastian Neubauer     return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
12402a6c8715SSebastian Neubauer   default: {
12412a6c8715SSebastian Neubauer     if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
12422a6c8715SSebastian Neubauer       return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
12432a6c8715SSebastian Neubauer     }
12442a6c8715SSebastian Neubauer     break;
12452a6c8715SSebastian Neubauer   }
12462a6c8715SSebastian Neubauer   }
12472a6c8715SSebastian Neubauer   return None;
12482a6c8715SSebastian Neubauer }
1249