12a6c8715SSebastian Neubauer //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
22a6c8715SSebastian Neubauer //
32a6c8715SSebastian Neubauer // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42a6c8715SSebastian Neubauer // See https://llvm.org/LICENSE.txt for license information.
52a6c8715SSebastian Neubauer // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
62a6c8715SSebastian Neubauer //
72a6c8715SSebastian Neubauer //===----------------------------------------------------------------------===//
82a6c8715SSebastian Neubauer //
92a6c8715SSebastian Neubauer // \file
102a6c8715SSebastian Neubauer // This file implements a TargetTransformInfo analysis pass specific to the
112a6c8715SSebastian Neubauer // AMDGPU target machine. It uses the target's detailed information to provide
122a6c8715SSebastian Neubauer // more precise answers to certain TTI queries, while letting the target
132a6c8715SSebastian Neubauer // independent and default TTI implementations handle the rest.
142a6c8715SSebastian Neubauer //
152a6c8715SSebastian Neubauer //===----------------------------------------------------------------------===//
162a6c8715SSebastian Neubauer
176a87e9b0Sdfukalov #include "AMDGPUInstrInfo.h"
182a6c8715SSebastian Neubauer #include "AMDGPUTargetTransformInfo.h"
19560d7e04Sdfukalov #include "GCNSubtarget.h"
206a87e9b0Sdfukalov #include "llvm/IR/IntrinsicsAMDGPU.h"
212a6c8715SSebastian Neubauer #include "llvm/Transforms/InstCombine/InstCombiner.h"
222a6c8715SSebastian Neubauer
232a6c8715SSebastian Neubauer using namespace llvm;
242a6c8715SSebastian Neubauer
252a6c8715SSebastian Neubauer #define DEBUG_TYPE "AMDGPUtti"
262a6c8715SSebastian Neubauer
272a6c8715SSebastian Neubauer namespace {
282a6c8715SSebastian Neubauer
292a6c8715SSebastian Neubauer struct AMDGPUImageDMaskIntrinsic {
302a6c8715SSebastian Neubauer unsigned Intr;
312a6c8715SSebastian Neubauer };
322a6c8715SSebastian Neubauer
332a6c8715SSebastian Neubauer #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
342a6c8715SSebastian Neubauer #include "InstCombineTables.inc"
352a6c8715SSebastian Neubauer
362a6c8715SSebastian Neubauer } // end anonymous namespace
372a6c8715SSebastian Neubauer
382a6c8715SSebastian Neubauer // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
392a6c8715SSebastian Neubauer //
402a6c8715SSebastian Neubauer // A single NaN input is folded to minnum, so we rely on that folding for
412a6c8715SSebastian Neubauer // handling NaNs.
fmed3AMDGCN(const APFloat & Src0,const APFloat & Src1,const APFloat & Src2)422a6c8715SSebastian Neubauer static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
432a6c8715SSebastian Neubauer const APFloat &Src2) {
442a6c8715SSebastian Neubauer APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
452a6c8715SSebastian Neubauer
462a6c8715SSebastian Neubauer APFloat::cmpResult Cmp0 = Max3.compare(Src0);
472a6c8715SSebastian Neubauer assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
482a6c8715SSebastian Neubauer if (Cmp0 == APFloat::cmpEqual)
492a6c8715SSebastian Neubauer return maxnum(Src1, Src2);
502a6c8715SSebastian Neubauer
512a6c8715SSebastian Neubauer APFloat::cmpResult Cmp1 = Max3.compare(Src1);
522a6c8715SSebastian Neubauer assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
532a6c8715SSebastian Neubauer if (Cmp1 == APFloat::cmpEqual)
542a6c8715SSebastian Neubauer return maxnum(Src0, Src2);
552a6c8715SSebastian Neubauer
562a6c8715SSebastian Neubauer return maxnum(Src0, Src1);
572a6c8715SSebastian Neubauer }
582a6c8715SSebastian Neubauer
59b8d19947SSebastian Neubauer // Check if a value can be converted to a 16-bit value without losing
60b8d19947SSebastian Neubauer // precision.
614ed7c6eeSSebastian Neubauer // The value is expected to be either a float (IsFloat = true) or an unsigned
624ed7c6eeSSebastian Neubauer // integer (IsFloat = false).
canSafelyConvertTo16Bit(Value & V,bool IsFloat)634ed7c6eeSSebastian Neubauer static bool canSafelyConvertTo16Bit(Value &V, bool IsFloat) {
64b8d19947SSebastian Neubauer Type *VTy = V.getType();
65b8d19947SSebastian Neubauer if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
66b8d19947SSebastian Neubauer // The value is already 16-bit, so we don't want to convert to 16-bit again!
67b8d19947SSebastian Neubauer return false;
68b8d19947SSebastian Neubauer }
694ed7c6eeSSebastian Neubauer if (IsFloat) {
70b8d19947SSebastian Neubauer if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
714ed7c6eeSSebastian Neubauer // We need to check that if we cast the index down to a half, we do not
724ed7c6eeSSebastian Neubauer // lose precision.
73b8d19947SSebastian Neubauer APFloat FloatValue(ConstFloat->getValueAPF());
74b8d19947SSebastian Neubauer bool LosesInfo = true;
754ed7c6eeSSebastian Neubauer FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero,
764ed7c6eeSSebastian Neubauer &LosesInfo);
77b8d19947SSebastian Neubauer return !LosesInfo;
78b8d19947SSebastian Neubauer }
794ed7c6eeSSebastian Neubauer } else {
804ed7c6eeSSebastian Neubauer if (ConstantInt *ConstInt = dyn_cast<ConstantInt>(&V)) {
814ed7c6eeSSebastian Neubauer // We need to check that if we cast the index down to an i16, we do not
824ed7c6eeSSebastian Neubauer // lose precision.
834ed7c6eeSSebastian Neubauer APInt IntValue(ConstInt->getValue());
844ed7c6eeSSebastian Neubauer return IntValue.getActiveBits() <= 16;
854ed7c6eeSSebastian Neubauer }
864ed7c6eeSSebastian Neubauer }
874ed7c6eeSSebastian Neubauer
88b8d19947SSebastian Neubauer Value *CastSrc;
894ed7c6eeSSebastian Neubauer bool IsExt = IsFloat ? match(&V, m_FPExt(PatternMatch::m_Value(CastSrc)))
904ed7c6eeSSebastian Neubauer : match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)));
914ed7c6eeSSebastian Neubauer if (IsExt) {
92b8d19947SSebastian Neubauer Type *CastSrcTy = CastSrc->getType();
93b8d19947SSebastian Neubauer if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
94b8d19947SSebastian Neubauer return true;
95b8d19947SSebastian Neubauer }
96b8d19947SSebastian Neubauer
97b8d19947SSebastian Neubauer return false;
98b8d19947SSebastian Neubauer }
99b8d19947SSebastian Neubauer
100b8d19947SSebastian Neubauer // Convert a value to 16-bit.
convertTo16Bit(Value & V,InstCombiner::BuilderTy & Builder)10120e9c36cSFangrui Song static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
102b8d19947SSebastian Neubauer Type *VTy = V.getType();
103b8d19947SSebastian Neubauer if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
104b8d19947SSebastian Neubauer return cast<Instruction>(&V)->getOperand(0);
105b8d19947SSebastian Neubauer if (VTy->isIntegerTy())
106b8d19947SSebastian Neubauer return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
107b8d19947SSebastian Neubauer if (VTy->isFloatingPointTy())
108b8d19947SSebastian Neubauer return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
109b8d19947SSebastian Neubauer
110b8d19947SSebastian Neubauer llvm_unreachable("Should never be called!");
111b8d19947SSebastian Neubauer }
112b8d19947SSebastian Neubauer
1132417de27SMariusz Sikora /// Applies Func(OldIntr.Args, OldIntr.ArgTys), creates intrinsic call with
1142417de27SMariusz Sikora /// modified arguments (based on OldIntr) and replaces InstToReplace with
1152417de27SMariusz Sikora /// this newly created intrinsic call.
modifyIntrinsicCall(IntrinsicInst & OldIntr,Instruction & InstToReplace,unsigned NewIntr,InstCombiner & IC,std::function<void (SmallVectorImpl<Value * > &,SmallVectorImpl<Type * > &)> Func)116603d1803SSebastian Neubauer static Optional<Instruction *> modifyIntrinsicCall(
1172417de27SMariusz Sikora IntrinsicInst &OldIntr, Instruction &InstToReplace, unsigned NewIntr,
1182417de27SMariusz Sikora InstCombiner &IC,
119603d1803SSebastian Neubauer std::function<void(SmallVectorImpl<Value *> &, SmallVectorImpl<Type *> &)>
120603d1803SSebastian Neubauer Func) {
121603d1803SSebastian Neubauer SmallVector<Type *, 4> ArgTys;
1222417de27SMariusz Sikora if (!Intrinsic::getIntrinsicSignature(OldIntr.getCalledFunction(), ArgTys))
123603d1803SSebastian Neubauer return None;
124603d1803SSebastian Neubauer
1252417de27SMariusz Sikora SmallVector<Value *, 8> Args(OldIntr.args());
126603d1803SSebastian Neubauer
127603d1803SSebastian Neubauer // Modify arguments and types
128603d1803SSebastian Neubauer Func(Args, ArgTys);
129603d1803SSebastian Neubauer
1302417de27SMariusz Sikora Function *I = Intrinsic::getDeclaration(OldIntr.getModule(), NewIntr, ArgTys);
131603d1803SSebastian Neubauer
132603d1803SSebastian Neubauer CallInst *NewCall = IC.Builder.CreateCall(I, Args);
1332417de27SMariusz Sikora NewCall->takeName(&OldIntr);
1342417de27SMariusz Sikora NewCall->copyMetadata(OldIntr);
135603d1803SSebastian Neubauer if (isa<FPMathOperator>(NewCall))
1362417de27SMariusz Sikora NewCall->copyFastMathFlags(&OldIntr);
137603d1803SSebastian Neubauer
138603d1803SSebastian Neubauer // Erase and replace uses
1392417de27SMariusz Sikora if (!InstToReplace.getType()->isVoidTy())
1402417de27SMariusz Sikora IC.replaceInstUsesWith(InstToReplace, NewCall);
1412417de27SMariusz Sikora
1422417de27SMariusz Sikora bool RemoveOldIntr = &OldIntr != &InstToReplace;
1432417de27SMariusz Sikora
1442417de27SMariusz Sikora auto RetValue = IC.eraseInstFromFunction(InstToReplace);
1452417de27SMariusz Sikora if (RemoveOldIntr)
1462417de27SMariusz Sikora IC.eraseInstFromFunction(OldIntr);
1472417de27SMariusz Sikora
1482417de27SMariusz Sikora return RetValue;
149603d1803SSebastian Neubauer }
150603d1803SSebastian Neubauer
151b8d19947SSebastian Neubauer static Optional<Instruction *>
simplifyAMDGCNImageIntrinsic(const GCNSubtarget * ST,const AMDGPU::ImageDimIntrinsicInfo * ImageDimIntr,IntrinsicInst & II,InstCombiner & IC)152b8d19947SSebastian Neubauer simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
153b8d19947SSebastian Neubauer const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
154b8d19947SSebastian Neubauer IntrinsicInst &II, InstCombiner &IC) {
155603d1803SSebastian Neubauer // Optimize _L to _LZ when _L is zero
156603d1803SSebastian Neubauer if (const auto *LZMappingInfo =
157603d1803SSebastian Neubauer AMDGPU::getMIMGLZMappingInfo(ImageDimIntr->BaseOpcode)) {
158603d1803SSebastian Neubauer if (auto *ConstantLod =
159603d1803SSebastian Neubauer dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->LodIndex))) {
160603d1803SSebastian Neubauer if (ConstantLod->isZero() || ConstantLod->isNegative()) {
161603d1803SSebastian Neubauer const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
162603d1803SSebastian Neubauer AMDGPU::getImageDimIntrinsicByBaseOpcode(LZMappingInfo->LZ,
163603d1803SSebastian Neubauer ImageDimIntr->Dim);
164603d1803SSebastian Neubauer return modifyIntrinsicCall(
1652417de27SMariusz Sikora II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
166603d1803SSebastian Neubauer Args.erase(Args.begin() + ImageDimIntr->LodIndex);
167603d1803SSebastian Neubauer });
168603d1803SSebastian Neubauer }
169603d1803SSebastian Neubauer }
170603d1803SSebastian Neubauer }
171603d1803SSebastian Neubauer
172603d1803SSebastian Neubauer // Optimize _mip away, when 'lod' is zero
173603d1803SSebastian Neubauer if (const auto *MIPMappingInfo =
174603d1803SSebastian Neubauer AMDGPU::getMIMGMIPMappingInfo(ImageDimIntr->BaseOpcode)) {
175603d1803SSebastian Neubauer if (auto *ConstantMip =
176603d1803SSebastian Neubauer dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->MipIndex))) {
177603d1803SSebastian Neubauer if (ConstantMip->isZero()) {
178603d1803SSebastian Neubauer const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
179603d1803SSebastian Neubauer AMDGPU::getImageDimIntrinsicByBaseOpcode(MIPMappingInfo->NONMIP,
180603d1803SSebastian Neubauer ImageDimIntr->Dim);
181603d1803SSebastian Neubauer return modifyIntrinsicCall(
1822417de27SMariusz Sikora II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
183603d1803SSebastian Neubauer Args.erase(Args.begin() + ImageDimIntr->MipIndex);
184603d1803SSebastian Neubauer });
185603d1803SSebastian Neubauer }
186603d1803SSebastian Neubauer }
187603d1803SSebastian Neubauer }
188603d1803SSebastian Neubauer
189603d1803SSebastian Neubauer // Optimize _bias away when 'bias' is zero
190603d1803SSebastian Neubauer if (const auto *BiasMappingInfo =
191603d1803SSebastian Neubauer AMDGPU::getMIMGBiasMappingInfo(ImageDimIntr->BaseOpcode)) {
192603d1803SSebastian Neubauer if (auto *ConstantBias =
193603d1803SSebastian Neubauer dyn_cast<ConstantFP>(II.getOperand(ImageDimIntr->BiasIndex))) {
194603d1803SSebastian Neubauer if (ConstantBias->isZero()) {
195603d1803SSebastian Neubauer const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
196603d1803SSebastian Neubauer AMDGPU::getImageDimIntrinsicByBaseOpcode(BiasMappingInfo->NoBias,
197603d1803SSebastian Neubauer ImageDimIntr->Dim);
198603d1803SSebastian Neubauer return modifyIntrinsicCall(
1992417de27SMariusz Sikora II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
200603d1803SSebastian Neubauer Args.erase(Args.begin() + ImageDimIntr->BiasIndex);
201603d1803SSebastian Neubauer ArgTys.erase(ArgTys.begin() + ImageDimIntr->BiasTyArg);
202603d1803SSebastian Neubauer });
203603d1803SSebastian Neubauer }
204603d1803SSebastian Neubauer }
205603d1803SSebastian Neubauer }
206603d1803SSebastian Neubauer
20780532ebbSSebastian Neubauer // Optimize _offset away when 'offset' is zero
20880532ebbSSebastian Neubauer if (const auto *OffsetMappingInfo =
20980532ebbSSebastian Neubauer AMDGPU::getMIMGOffsetMappingInfo(ImageDimIntr->BaseOpcode)) {
21080532ebbSSebastian Neubauer if (auto *ConstantOffset =
21180532ebbSSebastian Neubauer dyn_cast<ConstantInt>(II.getOperand(ImageDimIntr->OffsetIndex))) {
21280532ebbSSebastian Neubauer if (ConstantOffset->isZero()) {
21380532ebbSSebastian Neubauer const AMDGPU::ImageDimIntrinsicInfo *NewImageDimIntr =
21480532ebbSSebastian Neubauer AMDGPU::getImageDimIntrinsicByBaseOpcode(
21580532ebbSSebastian Neubauer OffsetMappingInfo->NoOffset, ImageDimIntr->Dim);
21680532ebbSSebastian Neubauer return modifyIntrinsicCall(
2172417de27SMariusz Sikora II, II, NewImageDimIntr->Intr, IC, [&](auto &Args, auto &ArgTys) {
21880532ebbSSebastian Neubauer Args.erase(Args.begin() + ImageDimIntr->OffsetIndex);
21980532ebbSSebastian Neubauer });
22080532ebbSSebastian Neubauer }
22180532ebbSSebastian Neubauer }
22280532ebbSSebastian Neubauer }
22380532ebbSSebastian Neubauer
2242417de27SMariusz Sikora // Try to use D16
2252417de27SMariusz Sikora if (ST->hasD16Images()) {
2262417de27SMariusz Sikora
2272417de27SMariusz Sikora const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
2282417de27SMariusz Sikora AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode);
2292417de27SMariusz Sikora
2302417de27SMariusz Sikora if (BaseOpcode->HasD16) {
2312417de27SMariusz Sikora
2322417de27SMariusz Sikora // If the only use of image intrinsic is a fptrunc (with conversion to
2332417de27SMariusz Sikora // half) then both fptrunc and image intrinsic will be replaced with image
2342417de27SMariusz Sikora // intrinsic with D16 flag.
2352417de27SMariusz Sikora if (II.hasOneUse()) {
2362417de27SMariusz Sikora Instruction *User = II.user_back();
2372417de27SMariusz Sikora
2382417de27SMariusz Sikora if (User->getOpcode() == Instruction::FPTrunc &&
2392417de27SMariusz Sikora User->getType()->getScalarType()->isHalfTy()) {
2402417de27SMariusz Sikora
2412417de27SMariusz Sikora return modifyIntrinsicCall(II, *User, ImageDimIntr->Intr, IC,
2422417de27SMariusz Sikora [&](auto &Args, auto &ArgTys) {
2432417de27SMariusz Sikora // Change return type of image intrinsic.
2442417de27SMariusz Sikora // Set it to return type of fptrunc.
2452417de27SMariusz Sikora ArgTys[0] = User->getType();
2462417de27SMariusz Sikora });
2472417de27SMariusz Sikora }
2482417de27SMariusz Sikora }
2492417de27SMariusz Sikora }
2502417de27SMariusz Sikora }
2512417de27SMariusz Sikora
252603d1803SSebastian Neubauer // Try to use A16 or G16
253b8d19947SSebastian Neubauer if (!ST->hasA16() && !ST->hasG16())
254b8d19947SSebastian Neubauer return None;
255b8d19947SSebastian Neubauer
2564ed7c6eeSSebastian Neubauer // Address is interpreted as float if the instruction has a sampler or as
2574ed7c6eeSSebastian Neubauer // unsigned int if there is no sampler.
2584ed7c6eeSSebastian Neubauer bool HasSampler =
2594ed7c6eeSSebastian Neubauer AMDGPU::getMIMGBaseOpcodeInfo(ImageDimIntr->BaseOpcode)->Sampler;
260b8d19947SSebastian Neubauer bool FloatCoord = false;
261b8d19947SSebastian Neubauer // true means derivatives can be converted to 16 bit, coordinates not
262b8d19947SSebastian Neubauer bool OnlyDerivatives = false;
263b8d19947SSebastian Neubauer
264b8d19947SSebastian Neubauer for (unsigned OperandIndex = ImageDimIntr->GradientStart;
265b8d19947SSebastian Neubauer OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
266b8d19947SSebastian Neubauer Value *Coord = II.getOperand(OperandIndex);
267b8d19947SSebastian Neubauer // If the values are not derived from 16-bit values, we cannot optimize.
2684ed7c6eeSSebastian Neubauer if (!canSafelyConvertTo16Bit(*Coord, HasSampler)) {
269b8d19947SSebastian Neubauer if (OperandIndex < ImageDimIntr->CoordStart ||
270b8d19947SSebastian Neubauer ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
271b8d19947SSebastian Neubauer return None;
272b8d19947SSebastian Neubauer }
273b8d19947SSebastian Neubauer // All gradients can be converted, so convert only them
274b8d19947SSebastian Neubauer OnlyDerivatives = true;
275b8d19947SSebastian Neubauer break;
276b8d19947SSebastian Neubauer }
277b8d19947SSebastian Neubauer
278b8d19947SSebastian Neubauer assert(OperandIndex == ImageDimIntr->GradientStart ||
279b8d19947SSebastian Neubauer FloatCoord == Coord->getType()->isFloatingPointTy());
280b8d19947SSebastian Neubauer FloatCoord = Coord->getType()->isFloatingPointTy();
281b8d19947SSebastian Neubauer }
282b8d19947SSebastian Neubauer
2830530fdbbSSebastian Neubauer if (!OnlyDerivatives && !ST->hasA16())
284b8d19947SSebastian Neubauer OnlyDerivatives = true; // Only supports G16
2850530fdbbSSebastian Neubauer
2860530fdbbSSebastian Neubauer // Check if there is a bias parameter and if it can be converted to f16
2870530fdbbSSebastian Neubauer if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
2880530fdbbSSebastian Neubauer Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
2894ed7c6eeSSebastian Neubauer assert(HasSampler &&
2904ed7c6eeSSebastian Neubauer "Only image instructions with a sampler can have a bias");
2914ed7c6eeSSebastian Neubauer if (!canSafelyConvertTo16Bit(*Bias, HasSampler))
2920530fdbbSSebastian Neubauer OnlyDerivatives = true;
293b8d19947SSebastian Neubauer }
294b8d19947SSebastian Neubauer
2950530fdbbSSebastian Neubauer if (OnlyDerivatives && (!ST->hasG16() || ImageDimIntr->GradientStart ==
2960530fdbbSSebastian Neubauer ImageDimIntr->CoordStart))
2970530fdbbSSebastian Neubauer return None;
2980530fdbbSSebastian Neubauer
299b8d19947SSebastian Neubauer Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
300b8d19947SSebastian Neubauer : Type::getInt16Ty(II.getContext());
301b8d19947SSebastian Neubauer
302603d1803SSebastian Neubauer return modifyIntrinsicCall(
3032417de27SMariusz Sikora II, II, II.getIntrinsicID(), IC, [&](auto &Args, auto &ArgTys) {
304b8d19947SSebastian Neubauer ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
3050530fdbbSSebastian Neubauer if (!OnlyDerivatives) {
306b8d19947SSebastian Neubauer ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
3070530fdbbSSebastian Neubauer
3080530fdbbSSebastian Neubauer // Change the bias type
3090530fdbbSSebastian Neubauer if (ImageDimIntr->NumBiasArgs != 0)
3100530fdbbSSebastian Neubauer ArgTys[ImageDimIntr->BiasTyArg] = Type::getHalfTy(II.getContext());
3110530fdbbSSebastian Neubauer }
312b8d19947SSebastian Neubauer
313b8d19947SSebastian Neubauer unsigned EndIndex =
314b8d19947SSebastian Neubauer OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
315b8d19947SSebastian Neubauer for (unsigned OperandIndex = ImageDimIntr->GradientStart;
316b8d19947SSebastian Neubauer OperandIndex < EndIndex; OperandIndex++) {
317b8d19947SSebastian Neubauer Args[OperandIndex] =
318b8d19947SSebastian Neubauer convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
319b8d19947SSebastian Neubauer }
320b8d19947SSebastian Neubauer
3210530fdbbSSebastian Neubauer // Convert the bias
3220530fdbbSSebastian Neubauer if (!OnlyDerivatives && ImageDimIntr->NumBiasArgs != 0) {
3230530fdbbSSebastian Neubauer Value *Bias = II.getOperand(ImageDimIntr->BiasIndex);
3240530fdbbSSebastian Neubauer Args[ImageDimIntr->BiasIndex] = convertTo16Bit(*Bias, IC.Builder);
3250530fdbbSSebastian Neubauer }
326603d1803SSebastian Neubauer });
327b8d19947SSebastian Neubauer }
328b8d19947SSebastian Neubauer
canSimplifyLegacyMulToMul(const Value * Op0,const Value * Op1,InstCombiner & IC) const329958130dfSJay Foad bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
330958130dfSJay Foad InstCombiner &IC) const {
331958130dfSJay Foad // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
332958130dfSJay Foad // infinity, gives +0.0. If we can prove we don't have one of the special
333958130dfSJay Foad // cases then we can use a normal multiply instead.
334958130dfSJay Foad // TODO: Create and use isKnownFiniteNonZero instead of just matching
335958130dfSJay Foad // constants here.
336958130dfSJay Foad if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
337958130dfSJay Foad match(Op1, PatternMatch::m_FiniteNonZero())) {
338958130dfSJay Foad // One operand is not zero or infinity or NaN.
339958130dfSJay Foad return true;
340958130dfSJay Foad }
341958130dfSJay Foad auto *TLI = &IC.getTargetLibraryInfo();
342958130dfSJay Foad if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
343958130dfSJay Foad isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
344958130dfSJay Foad // Neither operand is infinity or NaN.
345958130dfSJay Foad return true;
346958130dfSJay Foad }
347958130dfSJay Foad return false;
348958130dfSJay Foad }
349958130dfSJay Foad
3502a6c8715SSebastian Neubauer Optional<Instruction *>
instCombineIntrinsic(InstCombiner & IC,IntrinsicInst & II) const3512a6c8715SSebastian Neubauer GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
3522a6c8715SSebastian Neubauer Intrinsic::ID IID = II.getIntrinsicID();
3532a6c8715SSebastian Neubauer switch (IID) {
3542a6c8715SSebastian Neubauer case Intrinsic::amdgcn_rcp: {
3552a6c8715SSebastian Neubauer Value *Src = II.getArgOperand(0);
3562a6c8715SSebastian Neubauer
3572a6c8715SSebastian Neubauer // TODO: Move to ConstantFolding/InstSimplify?
3582a6c8715SSebastian Neubauer if (isa<UndefValue>(Src)) {
3592a6c8715SSebastian Neubauer Type *Ty = II.getType();
3602a6c8715SSebastian Neubauer auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
3612a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, QNaN);
3622a6c8715SSebastian Neubauer }
3632a6c8715SSebastian Neubauer
3642a6c8715SSebastian Neubauer if (II.isStrictFP())
3652a6c8715SSebastian Neubauer break;
3662a6c8715SSebastian Neubauer
3672a6c8715SSebastian Neubauer if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3682a6c8715SSebastian Neubauer const APFloat &ArgVal = C->getValueAPF();
3692a6c8715SSebastian Neubauer APFloat Val(ArgVal.getSemantics(), 1);
3702a6c8715SSebastian Neubauer Val.divide(ArgVal, APFloat::rmNearestTiesToEven);
3712a6c8715SSebastian Neubauer
3722a6c8715SSebastian Neubauer // This is more precise than the instruction may give.
3732a6c8715SSebastian Neubauer //
3742a6c8715SSebastian Neubauer // TODO: The instruction always flushes denormal results (except for f16),
3752a6c8715SSebastian Neubauer // should this also?
3762a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
3772a6c8715SSebastian Neubauer }
3782a6c8715SSebastian Neubauer
3792a6c8715SSebastian Neubauer break;
3802a6c8715SSebastian Neubauer }
3812a6c8715SSebastian Neubauer case Intrinsic::amdgcn_rsq: {
3822a6c8715SSebastian Neubauer Value *Src = II.getArgOperand(0);
3832a6c8715SSebastian Neubauer
3842a6c8715SSebastian Neubauer // TODO: Move to ConstantFolding/InstSimplify?
3852a6c8715SSebastian Neubauer if (isa<UndefValue>(Src)) {
3862a6c8715SSebastian Neubauer Type *Ty = II.getType();
3872a6c8715SSebastian Neubauer auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
3882a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, QNaN);
3892a6c8715SSebastian Neubauer }
3902a6c8715SSebastian Neubauer
3912a6c8715SSebastian Neubauer break;
3922a6c8715SSebastian Neubauer }
3932a6c8715SSebastian Neubauer case Intrinsic::amdgcn_frexp_mant:
3942a6c8715SSebastian Neubauer case Intrinsic::amdgcn_frexp_exp: {
3952a6c8715SSebastian Neubauer Value *Src = II.getArgOperand(0);
3962a6c8715SSebastian Neubauer if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
3972a6c8715SSebastian Neubauer int Exp;
3982a6c8715SSebastian Neubauer APFloat Significand =
3992a6c8715SSebastian Neubauer frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
4002a6c8715SSebastian Neubauer
4012a6c8715SSebastian Neubauer if (IID == Intrinsic::amdgcn_frexp_mant) {
4022a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(
4032a6c8715SSebastian Neubauer II, ConstantFP::get(II.getContext(), Significand));
4042a6c8715SSebastian Neubauer }
4052a6c8715SSebastian Neubauer
4062a6c8715SSebastian Neubauer // Match instruction special case behavior.
4072a6c8715SSebastian Neubauer if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
4082a6c8715SSebastian Neubauer Exp = 0;
4092a6c8715SSebastian Neubauer
4102a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
4112a6c8715SSebastian Neubauer }
4122a6c8715SSebastian Neubauer
4132a6c8715SSebastian Neubauer if (isa<UndefValue>(Src)) {
4142a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
4152a6c8715SSebastian Neubauer }
4162a6c8715SSebastian Neubauer
4172a6c8715SSebastian Neubauer break;
4182a6c8715SSebastian Neubauer }
4192a6c8715SSebastian Neubauer case Intrinsic::amdgcn_class: {
4202a6c8715SSebastian Neubauer enum {
4212a6c8715SSebastian Neubauer S_NAN = 1 << 0, // Signaling NaN
4222a6c8715SSebastian Neubauer Q_NAN = 1 << 1, // Quiet NaN
4232a6c8715SSebastian Neubauer N_INFINITY = 1 << 2, // Negative infinity
4242a6c8715SSebastian Neubauer N_NORMAL = 1 << 3, // Negative normal
4252a6c8715SSebastian Neubauer N_SUBNORMAL = 1 << 4, // Negative subnormal
4262a6c8715SSebastian Neubauer N_ZERO = 1 << 5, // Negative zero
4272a6c8715SSebastian Neubauer P_ZERO = 1 << 6, // Positive zero
4282a6c8715SSebastian Neubauer P_SUBNORMAL = 1 << 7, // Positive subnormal
4292a6c8715SSebastian Neubauer P_NORMAL = 1 << 8, // Positive normal
4302a6c8715SSebastian Neubauer P_INFINITY = 1 << 9 // Positive infinity
4312a6c8715SSebastian Neubauer };
4322a6c8715SSebastian Neubauer
4332a6c8715SSebastian Neubauer const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
4342a6c8715SSebastian Neubauer N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL |
4352a6c8715SSebastian Neubauer P_NORMAL | P_INFINITY;
4362a6c8715SSebastian Neubauer
4372a6c8715SSebastian Neubauer Value *Src0 = II.getArgOperand(0);
4382a6c8715SSebastian Neubauer Value *Src1 = II.getArgOperand(1);
4392a6c8715SSebastian Neubauer const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
4402a6c8715SSebastian Neubauer if (!CMask) {
4412a6c8715SSebastian Neubauer if (isa<UndefValue>(Src0)) {
4422a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
4432a6c8715SSebastian Neubauer }
4442a6c8715SSebastian Neubauer
4452a6c8715SSebastian Neubauer if (isa<UndefValue>(Src1)) {
4462a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II,
4472a6c8715SSebastian Neubauer ConstantInt::get(II.getType(), false));
4482a6c8715SSebastian Neubauer }
4492a6c8715SSebastian Neubauer break;
4502a6c8715SSebastian Neubauer }
4512a6c8715SSebastian Neubauer
4522a6c8715SSebastian Neubauer uint32_t Mask = CMask->getZExtValue();
4532a6c8715SSebastian Neubauer
4542a6c8715SSebastian Neubauer // If all tests are made, it doesn't matter what the value is.
4552a6c8715SSebastian Neubauer if ((Mask & FullMask) == FullMask) {
4562a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
4572a6c8715SSebastian Neubauer }
4582a6c8715SSebastian Neubauer
4592a6c8715SSebastian Neubauer if ((Mask & FullMask) == 0) {
4602a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
4612a6c8715SSebastian Neubauer }
4622a6c8715SSebastian Neubauer
4632a6c8715SSebastian Neubauer if (Mask == (S_NAN | Q_NAN)) {
4642a6c8715SSebastian Neubauer // Equivalent of isnan. Replace with standard fcmp.
4652a6c8715SSebastian Neubauer Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
4662a6c8715SSebastian Neubauer FCmp->takeName(&II);
4672a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, FCmp);
4682a6c8715SSebastian Neubauer }
4692a6c8715SSebastian Neubauer
4702a6c8715SSebastian Neubauer if (Mask == (N_ZERO | P_ZERO)) {
4712a6c8715SSebastian Neubauer // Equivalent of == 0.
4722a6c8715SSebastian Neubauer Value *FCmp =
4732a6c8715SSebastian Neubauer IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
4742a6c8715SSebastian Neubauer
4752a6c8715SSebastian Neubauer FCmp->takeName(&II);
4762a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, FCmp);
4772a6c8715SSebastian Neubauer }
4782a6c8715SSebastian Neubauer
4792a6c8715SSebastian Neubauer // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
4802a6c8715SSebastian Neubauer if (((Mask & S_NAN) || (Mask & Q_NAN)) &&
4812a6c8715SSebastian Neubauer isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
4822a6c8715SSebastian Neubauer return IC.replaceOperand(
4832a6c8715SSebastian Neubauer II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN)));
4842a6c8715SSebastian Neubauer }
4852a6c8715SSebastian Neubauer
4862a6c8715SSebastian Neubauer const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
4872a6c8715SSebastian Neubauer if (!CVal) {
4882a6c8715SSebastian Neubauer if (isa<UndefValue>(Src0)) {
4892a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
4902a6c8715SSebastian Neubauer }
4912a6c8715SSebastian Neubauer
4922a6c8715SSebastian Neubauer // Clamp mask to used bits
4932a6c8715SSebastian Neubauer if ((Mask & FullMask) != Mask) {
4942a6c8715SSebastian Neubauer CallInst *NewCall = IC.Builder.CreateCall(
4952a6c8715SSebastian Neubauer II.getCalledFunction(),
4962a6c8715SSebastian Neubauer {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)});
4972a6c8715SSebastian Neubauer
4982a6c8715SSebastian Neubauer NewCall->takeName(&II);
4992a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, NewCall);
5002a6c8715SSebastian Neubauer }
5012a6c8715SSebastian Neubauer
5022a6c8715SSebastian Neubauer break;
5032a6c8715SSebastian Neubauer }
5042a6c8715SSebastian Neubauer
5052a6c8715SSebastian Neubauer const APFloat &Val = CVal->getValueAPF();
5062a6c8715SSebastian Neubauer
5072a6c8715SSebastian Neubauer bool Result =
5082a6c8715SSebastian Neubauer ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
5092a6c8715SSebastian Neubauer ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
5102a6c8715SSebastian Neubauer ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
5112a6c8715SSebastian Neubauer ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
5122a6c8715SSebastian Neubauer ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
5132a6c8715SSebastian Neubauer ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
5142a6c8715SSebastian Neubauer ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
5152a6c8715SSebastian Neubauer ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
5162a6c8715SSebastian Neubauer ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
5172a6c8715SSebastian Neubauer ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
5182a6c8715SSebastian Neubauer
5192a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
5202a6c8715SSebastian Neubauer }
5212a6c8715SSebastian Neubauer case Intrinsic::amdgcn_cvt_pkrtz: {
5222a6c8715SSebastian Neubauer Value *Src0 = II.getArgOperand(0);
5232a6c8715SSebastian Neubauer Value *Src1 = II.getArgOperand(1);
5242a6c8715SSebastian Neubauer if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
5252a6c8715SSebastian Neubauer if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
5262a6c8715SSebastian Neubauer const fltSemantics &HalfSem =
5272a6c8715SSebastian Neubauer II.getType()->getScalarType()->getFltSemantics();
5282a6c8715SSebastian Neubauer bool LosesInfo;
5292a6c8715SSebastian Neubauer APFloat Val0 = C0->getValueAPF();
5302a6c8715SSebastian Neubauer APFloat Val1 = C1->getValueAPF();
5312a6c8715SSebastian Neubauer Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
5322a6c8715SSebastian Neubauer Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
5332a6c8715SSebastian Neubauer
5342a6c8715SSebastian Neubauer Constant *Folded =
5352a6c8715SSebastian Neubauer ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
5362a6c8715SSebastian Neubauer ConstantFP::get(II.getContext(), Val1)});
5372a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, Folded);
5382a6c8715SSebastian Neubauer }
5392a6c8715SSebastian Neubauer }
5402a6c8715SSebastian Neubauer
5412a6c8715SSebastian Neubauer if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
5422a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
5432a6c8715SSebastian Neubauer }
5442a6c8715SSebastian Neubauer
5452a6c8715SSebastian Neubauer break;
5462a6c8715SSebastian Neubauer }
5472a6c8715SSebastian Neubauer case Intrinsic::amdgcn_cvt_pknorm_i16:
5482a6c8715SSebastian Neubauer case Intrinsic::amdgcn_cvt_pknorm_u16:
5492a6c8715SSebastian Neubauer case Intrinsic::amdgcn_cvt_pk_i16:
5502a6c8715SSebastian Neubauer case Intrinsic::amdgcn_cvt_pk_u16: {
5512a6c8715SSebastian Neubauer Value *Src0 = II.getArgOperand(0);
5522a6c8715SSebastian Neubauer Value *Src1 = II.getArgOperand(1);
5532a6c8715SSebastian Neubauer
5542a6c8715SSebastian Neubauer if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
5552a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
5562a6c8715SSebastian Neubauer }
5572a6c8715SSebastian Neubauer
5582a6c8715SSebastian Neubauer break;
5592a6c8715SSebastian Neubauer }
5602a6c8715SSebastian Neubauer case Intrinsic::amdgcn_ubfe:
5612a6c8715SSebastian Neubauer case Intrinsic::amdgcn_sbfe: {
5622a6c8715SSebastian Neubauer // Decompose simple cases into standard shifts.
5632a6c8715SSebastian Neubauer Value *Src = II.getArgOperand(0);
5642a6c8715SSebastian Neubauer if (isa<UndefValue>(Src)) {
5652a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, Src);
5662a6c8715SSebastian Neubauer }
5672a6c8715SSebastian Neubauer
5682a6c8715SSebastian Neubauer unsigned Width;
5692a6c8715SSebastian Neubauer Type *Ty = II.getType();
5702a6c8715SSebastian Neubauer unsigned IntSize = Ty->getIntegerBitWidth();
5712a6c8715SSebastian Neubauer
5722a6c8715SSebastian Neubauer ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
5732a6c8715SSebastian Neubauer if (CWidth) {
5742a6c8715SSebastian Neubauer Width = CWidth->getZExtValue();
5752a6c8715SSebastian Neubauer if ((Width & (IntSize - 1)) == 0) {
5762a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(Ty));
5772a6c8715SSebastian Neubauer }
5782a6c8715SSebastian Neubauer
5792a6c8715SSebastian Neubauer // Hardware ignores high bits, so remove those.
5802a6c8715SSebastian Neubauer if (Width >= IntSize) {
5812a6c8715SSebastian Neubauer return IC.replaceOperand(
5822a6c8715SSebastian Neubauer II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
5832a6c8715SSebastian Neubauer }
5842a6c8715SSebastian Neubauer }
5852a6c8715SSebastian Neubauer
5862a6c8715SSebastian Neubauer unsigned Offset;
5872a6c8715SSebastian Neubauer ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
5882a6c8715SSebastian Neubauer if (COffset) {
5892a6c8715SSebastian Neubauer Offset = COffset->getZExtValue();
5902a6c8715SSebastian Neubauer if (Offset >= IntSize) {
5912a6c8715SSebastian Neubauer return IC.replaceOperand(
5922a6c8715SSebastian Neubauer II, 1,
5932a6c8715SSebastian Neubauer ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
5942a6c8715SSebastian Neubauer }
5952a6c8715SSebastian Neubauer }
5962a6c8715SSebastian Neubauer
5972a6c8715SSebastian Neubauer bool Signed = IID == Intrinsic::amdgcn_sbfe;
5982a6c8715SSebastian Neubauer
5992a6c8715SSebastian Neubauer if (!CWidth || !COffset)
6002a6c8715SSebastian Neubauer break;
6012a6c8715SSebastian Neubauer
602dc6e8dfdSJacob Lambert // The case of Width == 0 is handled above, which makes this transformation
6032a6c8715SSebastian Neubauer // safe. If Width == 0, then the ashr and lshr instructions become poison
6042a6c8715SSebastian Neubauer // value since the shift amount would be equal to the bit size.
6052a6c8715SSebastian Neubauer assert(Width != 0);
6062a6c8715SSebastian Neubauer
6072a6c8715SSebastian Neubauer // TODO: This allows folding to undef when the hardware has specific
6082a6c8715SSebastian Neubauer // behavior?
6092a6c8715SSebastian Neubauer if (Offset + Width < IntSize) {
6102a6c8715SSebastian Neubauer Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
6112a6c8715SSebastian Neubauer Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
6122a6c8715SSebastian Neubauer : IC.Builder.CreateLShr(Shl, IntSize - Width);
6132a6c8715SSebastian Neubauer RightShift->takeName(&II);
6142a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, RightShift);
6152a6c8715SSebastian Neubauer }
6162a6c8715SSebastian Neubauer
6172a6c8715SSebastian Neubauer Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
6182a6c8715SSebastian Neubauer : IC.Builder.CreateLShr(Src, Offset);
6192a6c8715SSebastian Neubauer
6202a6c8715SSebastian Neubauer RightShift->takeName(&II);
6212a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, RightShift);
6222a6c8715SSebastian Neubauer }
6232a6c8715SSebastian Neubauer case Intrinsic::amdgcn_exp:
624*445a483bSJay Foad case Intrinsic::amdgcn_exp_row:
6252a6c8715SSebastian Neubauer case Intrinsic::amdgcn_exp_compr: {
6262a6c8715SSebastian Neubauer ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
6272a6c8715SSebastian Neubauer unsigned EnBits = En->getZExtValue();
6282a6c8715SSebastian Neubauer if (EnBits == 0xf)
6292a6c8715SSebastian Neubauer break; // All inputs enabled.
6302a6c8715SSebastian Neubauer
6312a6c8715SSebastian Neubauer bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
6322a6c8715SSebastian Neubauer bool Changed = false;
6332a6c8715SSebastian Neubauer for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
6342a6c8715SSebastian Neubauer if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
6352a6c8715SSebastian Neubauer (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
6362a6c8715SSebastian Neubauer Value *Src = II.getArgOperand(I + 2);
6372a6c8715SSebastian Neubauer if (!isa<UndefValue>(Src)) {
6382a6c8715SSebastian Neubauer IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
6392a6c8715SSebastian Neubauer Changed = true;
6402a6c8715SSebastian Neubauer }
6412a6c8715SSebastian Neubauer }
6422a6c8715SSebastian Neubauer }
6432a6c8715SSebastian Neubauer
6442a6c8715SSebastian Neubauer if (Changed) {
6452a6c8715SSebastian Neubauer return &II;
6462a6c8715SSebastian Neubauer }
6472a6c8715SSebastian Neubauer
6482a6c8715SSebastian Neubauer break;
6492a6c8715SSebastian Neubauer }
6502a6c8715SSebastian Neubauer case Intrinsic::amdgcn_fmed3: {
6512a6c8715SSebastian Neubauer // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
6522a6c8715SSebastian Neubauer // for the shader.
6532a6c8715SSebastian Neubauer
6542a6c8715SSebastian Neubauer Value *Src0 = II.getArgOperand(0);
6552a6c8715SSebastian Neubauer Value *Src1 = II.getArgOperand(1);
6562a6c8715SSebastian Neubauer Value *Src2 = II.getArgOperand(2);
6572a6c8715SSebastian Neubauer
6582a6c8715SSebastian Neubauer // Checking for NaN before canonicalization provides better fidelity when
6592a6c8715SSebastian Neubauer // mapping other operations onto fmed3 since the order of operands is
6602a6c8715SSebastian Neubauer // unchanged.
6612a6c8715SSebastian Neubauer CallInst *NewCall = nullptr;
6622a6c8715SSebastian Neubauer if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
6632a6c8715SSebastian Neubauer NewCall = IC.Builder.CreateMinNum(Src1, Src2);
6642a6c8715SSebastian Neubauer } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
6652a6c8715SSebastian Neubauer NewCall = IC.Builder.CreateMinNum(Src0, Src2);
6662a6c8715SSebastian Neubauer } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
6672a6c8715SSebastian Neubauer NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
6682a6c8715SSebastian Neubauer }
6692a6c8715SSebastian Neubauer
6702a6c8715SSebastian Neubauer if (NewCall) {
6712a6c8715SSebastian Neubauer NewCall->copyFastMathFlags(&II);
6722a6c8715SSebastian Neubauer NewCall->takeName(&II);
6732a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, NewCall);
6742a6c8715SSebastian Neubauer }
6752a6c8715SSebastian Neubauer
6762a6c8715SSebastian Neubauer bool Swap = false;
6772a6c8715SSebastian Neubauer // Canonicalize constants to RHS operands.
6782a6c8715SSebastian Neubauer //
6792a6c8715SSebastian Neubauer // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
6802a6c8715SSebastian Neubauer if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
6812a6c8715SSebastian Neubauer std::swap(Src0, Src1);
6822a6c8715SSebastian Neubauer Swap = true;
6832a6c8715SSebastian Neubauer }
6842a6c8715SSebastian Neubauer
6852a6c8715SSebastian Neubauer if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
6862a6c8715SSebastian Neubauer std::swap(Src1, Src2);
6872a6c8715SSebastian Neubauer Swap = true;
6882a6c8715SSebastian Neubauer }
6892a6c8715SSebastian Neubauer
6902a6c8715SSebastian Neubauer if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
6912a6c8715SSebastian Neubauer std::swap(Src0, Src1);
6922a6c8715SSebastian Neubauer Swap = true;
6932a6c8715SSebastian Neubauer }
6942a6c8715SSebastian Neubauer
6952a6c8715SSebastian Neubauer if (Swap) {
6962a6c8715SSebastian Neubauer II.setArgOperand(0, Src0);
6972a6c8715SSebastian Neubauer II.setArgOperand(1, Src1);
6982a6c8715SSebastian Neubauer II.setArgOperand(2, Src2);
6992a6c8715SSebastian Neubauer return &II;
7002a6c8715SSebastian Neubauer }
7012a6c8715SSebastian Neubauer
7022a6c8715SSebastian Neubauer if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
7032a6c8715SSebastian Neubauer if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
7042a6c8715SSebastian Neubauer if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
7052a6c8715SSebastian Neubauer APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
7062a6c8715SSebastian Neubauer C2->getValueAPF());
7072a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(
7082a6c8715SSebastian Neubauer II, ConstantFP::get(IC.Builder.getContext(), Result));
7092a6c8715SSebastian Neubauer }
7102a6c8715SSebastian Neubauer }
7112a6c8715SSebastian Neubauer }
7122a6c8715SSebastian Neubauer
7132a6c8715SSebastian Neubauer break;
7142a6c8715SSebastian Neubauer }
7152a6c8715SSebastian Neubauer case Intrinsic::amdgcn_icmp:
7162a6c8715SSebastian Neubauer case Intrinsic::amdgcn_fcmp: {
7172a6c8715SSebastian Neubauer const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
7182a6c8715SSebastian Neubauer // Guard against invalid arguments.
7192a6c8715SSebastian Neubauer int64_t CCVal = CC->getZExtValue();
7202a6c8715SSebastian Neubauer bool IsInteger = IID == Intrinsic::amdgcn_icmp;
7212a6c8715SSebastian Neubauer if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
7222a6c8715SSebastian Neubauer CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
7232a6c8715SSebastian Neubauer (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
7242a6c8715SSebastian Neubauer CCVal > CmpInst::LAST_FCMP_PREDICATE)))
7252a6c8715SSebastian Neubauer break;
7262a6c8715SSebastian Neubauer
7272a6c8715SSebastian Neubauer Value *Src0 = II.getArgOperand(0);
7282a6c8715SSebastian Neubauer Value *Src1 = II.getArgOperand(1);
7292a6c8715SSebastian Neubauer
7302a6c8715SSebastian Neubauer if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
7312a6c8715SSebastian Neubauer if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
7322a6c8715SSebastian Neubauer Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
7332a6c8715SSebastian Neubauer if (CCmp->isNullValue()) {
7342a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(
7352a6c8715SSebastian Neubauer II, ConstantExpr::getSExt(CCmp, II.getType()));
7362a6c8715SSebastian Neubauer }
7372a6c8715SSebastian Neubauer
7382a6c8715SSebastian Neubauer // The result of V_ICMP/V_FCMP assembly instructions (which this
7392a6c8715SSebastian Neubauer // intrinsic exposes) is one bit per thread, masked with the EXEC
7402a6c8715SSebastian Neubauer // register (which contains the bitmask of live threads). So a
7412a6c8715SSebastian Neubauer // comparison that always returns true is the same as a read of the
7422a6c8715SSebastian Neubauer // EXEC register.
7432a6c8715SSebastian Neubauer Function *NewF = Intrinsic::getDeclaration(
7442a6c8715SSebastian Neubauer II.getModule(), Intrinsic::read_register, II.getType());
7452a6c8715SSebastian Neubauer Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
7462a6c8715SSebastian Neubauer MDNode *MD = MDNode::get(II.getContext(), MDArgs);
7472a6c8715SSebastian Neubauer Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
7482a6c8715SSebastian Neubauer CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
7493f4d00bcSArthur Eubanks NewCall->addFnAttr(Attribute::Convergent);
7502a6c8715SSebastian Neubauer NewCall->takeName(&II);
7512a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, NewCall);
7522a6c8715SSebastian Neubauer }
7532a6c8715SSebastian Neubauer
7542a6c8715SSebastian Neubauer // Canonicalize constants to RHS.
7552a6c8715SSebastian Neubauer CmpInst::Predicate SwapPred =
7562a6c8715SSebastian Neubauer CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
7572a6c8715SSebastian Neubauer II.setArgOperand(0, Src1);
7582a6c8715SSebastian Neubauer II.setArgOperand(1, Src0);
7592a6c8715SSebastian Neubauer II.setArgOperand(
7602a6c8715SSebastian Neubauer 2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
7612a6c8715SSebastian Neubauer return &II;
7622a6c8715SSebastian Neubauer }
7632a6c8715SSebastian Neubauer
7642a6c8715SSebastian Neubauer if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
7652a6c8715SSebastian Neubauer break;
7662a6c8715SSebastian Neubauer
7672a6c8715SSebastian Neubauer // Canonicalize compare eq with true value to compare != 0
7682a6c8715SSebastian Neubauer // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
7692a6c8715SSebastian Neubauer // -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
7702a6c8715SSebastian Neubauer // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
7712a6c8715SSebastian Neubauer // -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
7722a6c8715SSebastian Neubauer Value *ExtSrc;
7732a6c8715SSebastian Neubauer if (CCVal == CmpInst::ICMP_EQ &&
7742a6c8715SSebastian Neubauer ((match(Src1, PatternMatch::m_One()) &&
7752a6c8715SSebastian Neubauer match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
7762a6c8715SSebastian Neubauer (match(Src1, PatternMatch::m_AllOnes()) &&
7772a6c8715SSebastian Neubauer match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
7782a6c8715SSebastian Neubauer ExtSrc->getType()->isIntegerTy(1)) {
7792a6c8715SSebastian Neubauer IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType()));
7802a6c8715SSebastian Neubauer IC.replaceOperand(II, 2,
7812a6c8715SSebastian Neubauer ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
7822a6c8715SSebastian Neubauer return &II;
7832a6c8715SSebastian Neubauer }
7842a6c8715SSebastian Neubauer
7852a6c8715SSebastian Neubauer CmpInst::Predicate SrcPred;
7862a6c8715SSebastian Neubauer Value *SrcLHS;
7872a6c8715SSebastian Neubauer Value *SrcRHS;
7882a6c8715SSebastian Neubauer
7892a6c8715SSebastian Neubauer // Fold compare eq/ne with 0 from a compare result as the predicate to the
7902a6c8715SSebastian Neubauer // intrinsic. The typical use is a wave vote function in the library, which
7912a6c8715SSebastian Neubauer // will be fed from a user code condition compared with 0. Fold in the
7922a6c8715SSebastian Neubauer // redundant compare.
7932a6c8715SSebastian Neubauer
7942a6c8715SSebastian Neubauer // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
7952a6c8715SSebastian Neubauer // -> llvm.amdgcn.[if]cmp(a, b, pred)
7962a6c8715SSebastian Neubauer //
7972a6c8715SSebastian Neubauer // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
7982a6c8715SSebastian Neubauer // -> llvm.amdgcn.[if]cmp(a, b, inv pred)
7992a6c8715SSebastian Neubauer if (match(Src1, PatternMatch::m_Zero()) &&
8002a6c8715SSebastian Neubauer match(Src0, PatternMatch::m_ZExtOrSExt(
8012a6c8715SSebastian Neubauer m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
8022a6c8715SSebastian Neubauer PatternMatch::m_Value(SrcRHS))))) {
8032a6c8715SSebastian Neubauer if (CCVal == CmpInst::ICMP_EQ)
8042a6c8715SSebastian Neubauer SrcPred = CmpInst::getInversePredicate(SrcPred);
8052a6c8715SSebastian Neubauer
8062a6c8715SSebastian Neubauer Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
8072a6c8715SSebastian Neubauer ? Intrinsic::amdgcn_fcmp
8082a6c8715SSebastian Neubauer : Intrinsic::amdgcn_icmp;
8092a6c8715SSebastian Neubauer
8102a6c8715SSebastian Neubauer Type *Ty = SrcLHS->getType();
8112a6c8715SSebastian Neubauer if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
8122a6c8715SSebastian Neubauer // Promote to next legal integer type.
8132a6c8715SSebastian Neubauer unsigned Width = CmpType->getBitWidth();
8142a6c8715SSebastian Neubauer unsigned NewWidth = Width;
8152a6c8715SSebastian Neubauer
8162a6c8715SSebastian Neubauer // Don't do anything for i1 comparisons.
8172a6c8715SSebastian Neubauer if (Width == 1)
8182a6c8715SSebastian Neubauer break;
8192a6c8715SSebastian Neubauer
8202a6c8715SSebastian Neubauer if (Width <= 16)
8212a6c8715SSebastian Neubauer NewWidth = 16;
8222a6c8715SSebastian Neubauer else if (Width <= 32)
8232a6c8715SSebastian Neubauer NewWidth = 32;
8242a6c8715SSebastian Neubauer else if (Width <= 64)
8252a6c8715SSebastian Neubauer NewWidth = 64;
8262a6c8715SSebastian Neubauer else if (Width > 64)
8272a6c8715SSebastian Neubauer break; // Can't handle this.
8282a6c8715SSebastian Neubauer
8292a6c8715SSebastian Neubauer if (Width != NewWidth) {
8302a6c8715SSebastian Neubauer IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
8312a6c8715SSebastian Neubauer if (CmpInst::isSigned(SrcPred)) {
8322a6c8715SSebastian Neubauer SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
8332a6c8715SSebastian Neubauer SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
8342a6c8715SSebastian Neubauer } else {
8352a6c8715SSebastian Neubauer SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
8362a6c8715SSebastian Neubauer SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
8372a6c8715SSebastian Neubauer }
8382a6c8715SSebastian Neubauer }
8392a6c8715SSebastian Neubauer } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
8402a6c8715SSebastian Neubauer break;
8412a6c8715SSebastian Neubauer
8422a6c8715SSebastian Neubauer Function *NewF = Intrinsic::getDeclaration(
8432a6c8715SSebastian Neubauer II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
8442a6c8715SSebastian Neubauer Value *Args[] = {SrcLHS, SrcRHS,
8452a6c8715SSebastian Neubauer ConstantInt::get(CC->getType(), SrcPred)};
8462a6c8715SSebastian Neubauer CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
8472a6c8715SSebastian Neubauer NewCall->takeName(&II);
8482a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, NewCall);
8492a6c8715SSebastian Neubauer }
8502a6c8715SSebastian Neubauer
8512a6c8715SSebastian Neubauer break;
8522a6c8715SSebastian Neubauer }
8532a6c8715SSebastian Neubauer case Intrinsic::amdgcn_ballot: {
8542a6c8715SSebastian Neubauer if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
8552a6c8715SSebastian Neubauer if (Src->isZero()) {
8562a6c8715SSebastian Neubauer // amdgcn.ballot(i1 0) is zero.
8572a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
8582a6c8715SSebastian Neubauer }
8592a6c8715SSebastian Neubauer
8602a6c8715SSebastian Neubauer if (Src->isOne()) {
8612a6c8715SSebastian Neubauer // amdgcn.ballot(i1 1) is exec.
8622a6c8715SSebastian Neubauer const char *RegName = "exec";
8632a6c8715SSebastian Neubauer if (II.getType()->isIntegerTy(32))
8642a6c8715SSebastian Neubauer RegName = "exec_lo";
8652a6c8715SSebastian Neubauer else if (!II.getType()->isIntegerTy(64))
8662a6c8715SSebastian Neubauer break;
8672a6c8715SSebastian Neubauer
8682a6c8715SSebastian Neubauer Function *NewF = Intrinsic::getDeclaration(
8692a6c8715SSebastian Neubauer II.getModule(), Intrinsic::read_register, II.getType());
8702a6c8715SSebastian Neubauer Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
8712a6c8715SSebastian Neubauer MDNode *MD = MDNode::get(II.getContext(), MDArgs);
8722a6c8715SSebastian Neubauer Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
8732a6c8715SSebastian Neubauer CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
8743f4d00bcSArthur Eubanks NewCall->addFnAttr(Attribute::Convergent);
8752a6c8715SSebastian Neubauer NewCall->takeName(&II);
8762a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, NewCall);
8772a6c8715SSebastian Neubauer }
8782a6c8715SSebastian Neubauer }
8792a6c8715SSebastian Neubauer break;
8802a6c8715SSebastian Neubauer }
8812a6c8715SSebastian Neubauer case Intrinsic::amdgcn_wqm_vote: {
8822a6c8715SSebastian Neubauer // wqm_vote is identity when the argument is constant.
8832a6c8715SSebastian Neubauer if (!isa<Constant>(II.getArgOperand(0)))
8842a6c8715SSebastian Neubauer break;
8852a6c8715SSebastian Neubauer
8862a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, II.getArgOperand(0));
8872a6c8715SSebastian Neubauer }
8882a6c8715SSebastian Neubauer case Intrinsic::amdgcn_kill: {
8892a6c8715SSebastian Neubauer const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
8902a6c8715SSebastian Neubauer if (!C || !C->getZExtValue())
8912a6c8715SSebastian Neubauer break;
8922a6c8715SSebastian Neubauer
8932a6c8715SSebastian Neubauer // amdgcn.kill(i1 1) is a no-op
8942a6c8715SSebastian Neubauer return IC.eraseInstFromFunction(II);
8952a6c8715SSebastian Neubauer }
8962a6c8715SSebastian Neubauer case Intrinsic::amdgcn_update_dpp: {
8972a6c8715SSebastian Neubauer Value *Old = II.getArgOperand(0);
8982a6c8715SSebastian Neubauer
8992a6c8715SSebastian Neubauer auto *BC = cast<ConstantInt>(II.getArgOperand(5));
9002a6c8715SSebastian Neubauer auto *RM = cast<ConstantInt>(II.getArgOperand(3));
9012a6c8715SSebastian Neubauer auto *BM = cast<ConstantInt>(II.getArgOperand(4));
9022a6c8715SSebastian Neubauer if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
9032a6c8715SSebastian Neubauer BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
9042a6c8715SSebastian Neubauer break;
9052a6c8715SSebastian Neubauer
9062a6c8715SSebastian Neubauer // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
9072a6c8715SSebastian Neubauer return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
9082a6c8715SSebastian Neubauer }
9092a6c8715SSebastian Neubauer case Intrinsic::amdgcn_permlane16:
9102a6c8715SSebastian Neubauer case Intrinsic::amdgcn_permlanex16: {
9112a6c8715SSebastian Neubauer // Discard vdst_in if it's not going to be read.
9122a6c8715SSebastian Neubauer Value *VDstIn = II.getArgOperand(0);
9132a6c8715SSebastian Neubauer if (isa<UndefValue>(VDstIn))
9142a6c8715SSebastian Neubauer break;
9152a6c8715SSebastian Neubauer
9162a6c8715SSebastian Neubauer ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
9172a6c8715SSebastian Neubauer ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
9182a6c8715SSebastian Neubauer if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
9192a6c8715SSebastian Neubauer break;
9202a6c8715SSebastian Neubauer
9212a6c8715SSebastian Neubauer return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
9222a6c8715SSebastian Neubauer }
923bfcfd53bSJay Foad case Intrinsic::amdgcn_permlane64:
924bfcfd53bSJay Foad // A constant value is trivially uniform.
925bfcfd53bSJay Foad if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
926bfcfd53bSJay Foad return IC.replaceInstUsesWith(II, C);
927bfcfd53bSJay Foad }
928bfcfd53bSJay Foad break;
9292a6c8715SSebastian Neubauer case Intrinsic::amdgcn_readfirstlane:
9302a6c8715SSebastian Neubauer case Intrinsic::amdgcn_readlane: {
9312a6c8715SSebastian Neubauer // A constant value is trivially uniform.
9322a6c8715SSebastian Neubauer if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
9332a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, C);
9342a6c8715SSebastian Neubauer }
9352a6c8715SSebastian Neubauer
9362a6c8715SSebastian Neubauer // The rest of these may not be safe if the exec may not be the same between
9372a6c8715SSebastian Neubauer // the def and use.
9382a6c8715SSebastian Neubauer Value *Src = II.getArgOperand(0);
9392a6c8715SSebastian Neubauer Instruction *SrcInst = dyn_cast<Instruction>(Src);
9402a6c8715SSebastian Neubauer if (SrcInst && SrcInst->getParent() != II.getParent())
9412a6c8715SSebastian Neubauer break;
9422a6c8715SSebastian Neubauer
9432a6c8715SSebastian Neubauer // readfirstlane (readfirstlane x) -> readfirstlane x
9442a6c8715SSebastian Neubauer // readlane (readfirstlane x), y -> readfirstlane x
9452a6c8715SSebastian Neubauer if (match(Src,
9462a6c8715SSebastian Neubauer PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
9472a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, Src);
9482a6c8715SSebastian Neubauer }
9492a6c8715SSebastian Neubauer
9502a6c8715SSebastian Neubauer if (IID == Intrinsic::amdgcn_readfirstlane) {
9512a6c8715SSebastian Neubauer // readfirstlane (readlane x, y) -> readlane x, y
9522a6c8715SSebastian Neubauer if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
9532a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, Src);
9542a6c8715SSebastian Neubauer }
9552a6c8715SSebastian Neubauer } else {
9562a6c8715SSebastian Neubauer // readlane (readlane x, y), y -> readlane x, y
9572a6c8715SSebastian Neubauer if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
9582a6c8715SSebastian Neubauer PatternMatch::m_Value(),
9592a6c8715SSebastian Neubauer PatternMatch::m_Specific(II.getArgOperand(1))))) {
9602a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, Src);
9612a6c8715SSebastian Neubauer }
9622a6c8715SSebastian Neubauer }
9632a6c8715SSebastian Neubauer
9642a6c8715SSebastian Neubauer break;
9652a6c8715SSebastian Neubauer }
9662a6c8715SSebastian Neubauer case Intrinsic::amdgcn_ldexp: {
9672a6c8715SSebastian Neubauer // FIXME: This doesn't introduce new instructions and belongs in
9682a6c8715SSebastian Neubauer // InstructionSimplify.
9692a6c8715SSebastian Neubauer Type *Ty = II.getType();
9702a6c8715SSebastian Neubauer Value *Op0 = II.getArgOperand(0);
9712a6c8715SSebastian Neubauer Value *Op1 = II.getArgOperand(1);
9722a6c8715SSebastian Neubauer
9732a6c8715SSebastian Neubauer // Folding undef to qnan is safe regardless of the FP mode.
9742a6c8715SSebastian Neubauer if (isa<UndefValue>(Op0)) {
9752a6c8715SSebastian Neubauer auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
9762a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, QNaN);
9772a6c8715SSebastian Neubauer }
9782a6c8715SSebastian Neubauer
9792a6c8715SSebastian Neubauer const APFloat *C = nullptr;
9802a6c8715SSebastian Neubauer match(Op0, PatternMatch::m_APFloat(C));
9812a6c8715SSebastian Neubauer
9822a6c8715SSebastian Neubauer // FIXME: Should flush denorms depending on FP mode, but that's ignored
9832a6c8715SSebastian Neubauer // everywhere else.
9842a6c8715SSebastian Neubauer //
9852a6c8715SSebastian Neubauer // These cases should be safe, even with strictfp.
9862a6c8715SSebastian Neubauer // ldexp(0.0, x) -> 0.0
9872a6c8715SSebastian Neubauer // ldexp(-0.0, x) -> -0.0
9882a6c8715SSebastian Neubauer // ldexp(inf, x) -> inf
9892a6c8715SSebastian Neubauer // ldexp(-inf, x) -> -inf
9902a6c8715SSebastian Neubauer if (C && (C->isZero() || C->isInfinity())) {
9912a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, Op0);
9922a6c8715SSebastian Neubauer }
9932a6c8715SSebastian Neubauer
9942a6c8715SSebastian Neubauer // With strictfp, be more careful about possibly needing to flush denormals
9952a6c8715SSebastian Neubauer // or not, and snan behavior depends on ieee_mode.
9962a6c8715SSebastian Neubauer if (II.isStrictFP())
9972a6c8715SSebastian Neubauer break;
9982a6c8715SSebastian Neubauer
9992a6c8715SSebastian Neubauer if (C && C->isNaN()) {
10002a6c8715SSebastian Neubauer // FIXME: We just need to make the nan quiet here, but that's unavailable
10012a6c8715SSebastian Neubauer // on APFloat, only IEEEfloat
10022a6c8715SSebastian Neubauer auto *Quieted =
10032a6c8715SSebastian Neubauer ConstantFP::get(Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven));
10042a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, Quieted);
10052a6c8715SSebastian Neubauer }
10062a6c8715SSebastian Neubauer
10072a6c8715SSebastian Neubauer // ldexp(x, 0) -> x
10082a6c8715SSebastian Neubauer // ldexp(x, undef) -> x
10092a6c8715SSebastian Neubauer if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
10102a6c8715SSebastian Neubauer return IC.replaceInstUsesWith(II, Op0);
10112a6c8715SSebastian Neubauer }
10122a6c8715SSebastian Neubauer
10132a6c8715SSebastian Neubauer break;
10142a6c8715SSebastian Neubauer }
101586a480e9SJay Foad case Intrinsic::amdgcn_fmul_legacy: {
101686a480e9SJay Foad Value *Op0 = II.getArgOperand(0);
101786a480e9SJay Foad Value *Op1 = II.getArgOperand(1);
101886a480e9SJay Foad
101986a480e9SJay Foad // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
102086a480e9SJay Foad // infinity, gives +0.0.
102186a480e9SJay Foad // TODO: Move to InstSimplify?
102286a480e9SJay Foad if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
102386a480e9SJay Foad match(Op1, PatternMatch::m_AnyZeroFP()))
102486a480e9SJay Foad return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType()));
102586a480e9SJay Foad
102686a480e9SJay Foad // If we can prove we don't have one of the special cases then we can use a
102786a480e9SJay Foad // normal fmul instruction instead.
1028958130dfSJay Foad if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
102986a480e9SJay Foad auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
103086a480e9SJay Foad FMul->takeName(&II);
103186a480e9SJay Foad return IC.replaceInstUsesWith(II, FMul);
103286a480e9SJay Foad }
103386a480e9SJay Foad break;
103486a480e9SJay Foad }
1035958130dfSJay Foad case Intrinsic::amdgcn_fma_legacy: {
1036958130dfSJay Foad Value *Op0 = II.getArgOperand(0);
1037958130dfSJay Foad Value *Op1 = II.getArgOperand(1);
1038958130dfSJay Foad Value *Op2 = II.getArgOperand(2);
1039958130dfSJay Foad
1040958130dfSJay Foad // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
1041958130dfSJay Foad // infinity, gives +0.0.
1042958130dfSJay Foad // TODO: Move to InstSimplify?
1043958130dfSJay Foad if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
1044958130dfSJay Foad match(Op1, PatternMatch::m_AnyZeroFP())) {
1045958130dfSJay Foad // It's tempting to just return Op2 here, but that would give the wrong
1046958130dfSJay Foad // result if Op2 was -0.0.
1047958130dfSJay Foad auto *Zero = ConstantFP::getNullValue(II.getType());
1048958130dfSJay Foad auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
1049958130dfSJay Foad FAdd->takeName(&II);
1050958130dfSJay Foad return IC.replaceInstUsesWith(II, FAdd);
1051958130dfSJay Foad }
1052958130dfSJay Foad
1053958130dfSJay Foad // If we can prove we don't have one of the special cases then we can use a
1054958130dfSJay Foad // normal fma instead.
1055958130dfSJay Foad if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
1056958130dfSJay Foad II.setCalledOperand(Intrinsic::getDeclaration(
1057958130dfSJay Foad II.getModule(), Intrinsic::fma, II.getType()));
1058958130dfSJay Foad return &II;
1059958130dfSJay Foad }
1060958130dfSJay Foad break;
1061958130dfSJay Foad }
106245f16eabSMatt Arsenault case Intrinsic::amdgcn_is_shared:
106345f16eabSMatt Arsenault case Intrinsic::amdgcn_is_private: {
106445f16eabSMatt Arsenault if (isa<UndefValue>(II.getArgOperand(0)))
106545f16eabSMatt Arsenault return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
106645f16eabSMatt Arsenault
106745f16eabSMatt Arsenault if (isa<ConstantPointerNull>(II.getArgOperand(0)))
106845f16eabSMatt Arsenault return IC.replaceInstUsesWith(II, ConstantInt::getFalse(II.getType()));
106945f16eabSMatt Arsenault break;
107045f16eabSMatt Arsenault }
1071b8d19947SSebastian Neubauer default: {
1072b8d19947SSebastian Neubauer if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
1073b8d19947SSebastian Neubauer AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
1074b8d19947SSebastian Neubauer return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
1075b8d19947SSebastian Neubauer }
1076b8d19947SSebastian Neubauer }
10772a6c8715SSebastian Neubauer }
10782a6c8715SSebastian Neubauer return None;
10792a6c8715SSebastian Neubauer }
10802a6c8715SSebastian Neubauer
10812a6c8715SSebastian Neubauer /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
10822a6c8715SSebastian Neubauer ///
10832a6c8715SSebastian Neubauer /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
10842a6c8715SSebastian Neubauer /// struct returns.
simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner & IC,IntrinsicInst & II,APInt DemandedElts,int DMaskIdx=-1)1085c6f08b14SBenjamin Kramer static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
10862a6c8715SSebastian Neubauer IntrinsicInst &II,
10872a6c8715SSebastian Neubauer APInt DemandedElts,
10882a6c8715SSebastian Neubauer int DMaskIdx = -1) {
10892a6c8715SSebastian Neubauer
10903b92db4cSChristopher Tetreault auto *IIVTy = cast<FixedVectorType>(II.getType());
10912a6c8715SSebastian Neubauer unsigned VWidth = IIVTy->getNumElements();
10922a6c8715SSebastian Neubauer if (VWidth == 1)
10932a6c8715SSebastian Neubauer return nullptr;
10942a6c8715SSebastian Neubauer
10952a6c8715SSebastian Neubauer IRBuilderBase::InsertPointGuard Guard(IC.Builder);
10962a6c8715SSebastian Neubauer IC.Builder.SetInsertPoint(&II);
10972a6c8715SSebastian Neubauer
10982a6c8715SSebastian Neubauer // Assume the arguments are unchanged and later override them, if needed.
10990e219b64SKazu Hirata SmallVector<Value *, 16> Args(II.args());
11002a6c8715SSebastian Neubauer
11012a6c8715SSebastian Neubauer if (DMaskIdx < 0) {
11022a6c8715SSebastian Neubauer // Buffer case.
11032a6c8715SSebastian Neubauer
11042a6c8715SSebastian Neubauer const unsigned ActiveBits = DemandedElts.getActiveBits();
11052a6c8715SSebastian Neubauer const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
11062a6c8715SSebastian Neubauer
11072a6c8715SSebastian Neubauer // Start assuming the prefix of elements is demanded, but possibly clear
11082a6c8715SSebastian Neubauer // some other bits if there are trailing zeros (unused components at front)
11092a6c8715SSebastian Neubauer // and update offset.
11102a6c8715SSebastian Neubauer DemandedElts = (1 << ActiveBits) - 1;
11112a6c8715SSebastian Neubauer
11122a6c8715SSebastian Neubauer if (UnusedComponentsAtFront > 0) {
11132a6c8715SSebastian Neubauer static const unsigned InvalidOffsetIdx = 0xf;
11142a6c8715SSebastian Neubauer
11152a6c8715SSebastian Neubauer unsigned OffsetIdx;
11162a6c8715SSebastian Neubauer switch (II.getIntrinsicID()) {
11172a6c8715SSebastian Neubauer case Intrinsic::amdgcn_raw_buffer_load:
11182a6c8715SSebastian Neubauer OffsetIdx = 1;
11192a6c8715SSebastian Neubauer break;
11202a6c8715SSebastian Neubauer case Intrinsic::amdgcn_s_buffer_load:
11212a6c8715SSebastian Neubauer // If resulting type is vec3, there is no point in trimming the
11222a6c8715SSebastian Neubauer // load with updated offset, as the vec3 would most likely be widened to
11232a6c8715SSebastian Neubauer // vec4 anyway during lowering.
11242a6c8715SSebastian Neubauer if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
11252a6c8715SSebastian Neubauer OffsetIdx = InvalidOffsetIdx;
11262a6c8715SSebastian Neubauer else
11272a6c8715SSebastian Neubauer OffsetIdx = 1;
11282a6c8715SSebastian Neubauer break;
11292a6c8715SSebastian Neubauer case Intrinsic::amdgcn_struct_buffer_load:
11302a6c8715SSebastian Neubauer OffsetIdx = 2;
11312a6c8715SSebastian Neubauer break;
11322a6c8715SSebastian Neubauer default:
11332a6c8715SSebastian Neubauer // TODO: handle tbuffer* intrinsics.
11342a6c8715SSebastian Neubauer OffsetIdx = InvalidOffsetIdx;
11352a6c8715SSebastian Neubauer break;
11362a6c8715SSebastian Neubauer }
11372a6c8715SSebastian Neubauer
11382a6c8715SSebastian Neubauer if (OffsetIdx != InvalidOffsetIdx) {
11392a6c8715SSebastian Neubauer // Clear demanded bits and update the offset.
11402a6c8715SSebastian Neubauer DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
11412a6c8715SSebastian Neubauer auto *Offset = II.getArgOperand(OffsetIdx);
11422a6c8715SSebastian Neubauer unsigned SingleComponentSizeInBits =
11432a6c8715SSebastian Neubauer IC.getDataLayout().getTypeSizeInBits(II.getType()->getScalarType());
11442a6c8715SSebastian Neubauer unsigned OffsetAdd =
11452a6c8715SSebastian Neubauer UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
11462a6c8715SSebastian Neubauer auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
11472a6c8715SSebastian Neubauer Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
11482a6c8715SSebastian Neubauer }
11492a6c8715SSebastian Neubauer }
11502a6c8715SSebastian Neubauer } else {
11512a6c8715SSebastian Neubauer // Image case.
11522a6c8715SSebastian Neubauer
11532a6c8715SSebastian Neubauer ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx));
11542a6c8715SSebastian Neubauer unsigned DMaskVal = DMask->getZExtValue() & 0xf;
11552a6c8715SSebastian Neubauer
11562a6c8715SSebastian Neubauer // Mask off values that are undefined because the dmask doesn't cover them
11572a6c8715SSebastian Neubauer DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
11582a6c8715SSebastian Neubauer
11592a6c8715SSebastian Neubauer unsigned NewDMaskVal = 0;
11602a6c8715SSebastian Neubauer unsigned OrigLoadIdx = 0;
11612a6c8715SSebastian Neubauer for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
11622a6c8715SSebastian Neubauer const unsigned Bit = 1 << SrcIdx;
11632a6c8715SSebastian Neubauer if (!!(DMaskVal & Bit)) {
11642a6c8715SSebastian Neubauer if (!!DemandedElts[OrigLoadIdx])
11652a6c8715SSebastian Neubauer NewDMaskVal |= Bit;
11662a6c8715SSebastian Neubauer OrigLoadIdx++;
11672a6c8715SSebastian Neubauer }
11682a6c8715SSebastian Neubauer }
11692a6c8715SSebastian Neubauer
11702a6c8715SSebastian Neubauer if (DMaskVal != NewDMaskVal)
11712a6c8715SSebastian Neubauer Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
11722a6c8715SSebastian Neubauer }
11732a6c8715SSebastian Neubauer
11742a6c8715SSebastian Neubauer unsigned NewNumElts = DemandedElts.countPopulation();
11752a6c8715SSebastian Neubauer if (!NewNumElts)
11762a6c8715SSebastian Neubauer return UndefValue::get(II.getType());
11772a6c8715SSebastian Neubauer
11782a6c8715SSebastian Neubauer if (NewNumElts >= VWidth && DemandedElts.isMask()) {
11792a6c8715SSebastian Neubauer if (DMaskIdx >= 0)
11802a6c8715SSebastian Neubauer II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
11812a6c8715SSebastian Neubauer return nullptr;
11822a6c8715SSebastian Neubauer }
11832a6c8715SSebastian Neubauer
11842a6c8715SSebastian Neubauer // Validate function argument and return types, extracting overloaded types
11852a6c8715SSebastian Neubauer // along the way.
11862a6c8715SSebastian Neubauer SmallVector<Type *, 6> OverloadTys;
11872a6c8715SSebastian Neubauer if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
11882a6c8715SSebastian Neubauer return nullptr;
11892a6c8715SSebastian Neubauer
11902a6c8715SSebastian Neubauer Module *M = II.getParent()->getParent()->getParent();
11912a6c8715SSebastian Neubauer Type *EltTy = IIVTy->getElementType();
11922a6c8715SSebastian Neubauer Type *NewTy =
11932a6c8715SSebastian Neubauer (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
11942a6c8715SSebastian Neubauer
11952a6c8715SSebastian Neubauer OverloadTys[0] = NewTy;
11962a6c8715SSebastian Neubauer Function *NewIntrin =
11972a6c8715SSebastian Neubauer Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys);
11982a6c8715SSebastian Neubauer
11992a6c8715SSebastian Neubauer CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
12002a6c8715SSebastian Neubauer NewCall->takeName(&II);
12012a6c8715SSebastian Neubauer NewCall->copyMetadata(II);
12022a6c8715SSebastian Neubauer
12032a6c8715SSebastian Neubauer if (NewNumElts == 1) {
12042a6c8715SSebastian Neubauer return IC.Builder.CreateInsertElement(UndefValue::get(II.getType()),
12052a6c8715SSebastian Neubauer NewCall,
12062a6c8715SSebastian Neubauer DemandedElts.countTrailingZeros());
12072a6c8715SSebastian Neubauer }
12082a6c8715SSebastian Neubauer
12092a6c8715SSebastian Neubauer SmallVector<int, 8> EltMask;
12102a6c8715SSebastian Neubauer unsigned NewLoadIdx = 0;
12112a6c8715SSebastian Neubauer for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
12122a6c8715SSebastian Neubauer if (!!DemandedElts[OrigLoadIdx])
12132a6c8715SSebastian Neubauer EltMask.push_back(NewLoadIdx++);
12142a6c8715SSebastian Neubauer else
12152a6c8715SSebastian Neubauer EltMask.push_back(NewNumElts);
12162a6c8715SSebastian Neubauer }
12172a6c8715SSebastian Neubauer
12189b296102SJuneyoung Lee Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
12192a6c8715SSebastian Neubauer
12202a6c8715SSebastian Neubauer return Shuffle;
12212a6c8715SSebastian Neubauer }
12222a6c8715SSebastian Neubauer
simplifyDemandedVectorEltsIntrinsic(InstCombiner & IC,IntrinsicInst & II,APInt DemandedElts,APInt & UndefElts,APInt & UndefElts2,APInt & UndefElts3,std::function<void (Instruction *,unsigned,APInt,APInt &)> SimplifyAndSetOp) const12232a6c8715SSebastian Neubauer Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
12242a6c8715SSebastian Neubauer InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
12252a6c8715SSebastian Neubauer APInt &UndefElts2, APInt &UndefElts3,
12262a6c8715SSebastian Neubauer std::function<void(Instruction *, unsigned, APInt, APInt &)>
12272a6c8715SSebastian Neubauer SimplifyAndSetOp) const {
12282a6c8715SSebastian Neubauer switch (II.getIntrinsicID()) {
12292a6c8715SSebastian Neubauer case Intrinsic::amdgcn_buffer_load:
12302a6c8715SSebastian Neubauer case Intrinsic::amdgcn_buffer_load_format:
12312a6c8715SSebastian Neubauer case Intrinsic::amdgcn_raw_buffer_load:
12322a6c8715SSebastian Neubauer case Intrinsic::amdgcn_raw_buffer_load_format:
12332a6c8715SSebastian Neubauer case Intrinsic::amdgcn_raw_tbuffer_load:
12342a6c8715SSebastian Neubauer case Intrinsic::amdgcn_s_buffer_load:
12352a6c8715SSebastian Neubauer case Intrinsic::amdgcn_struct_buffer_load:
12362a6c8715SSebastian Neubauer case Intrinsic::amdgcn_struct_buffer_load_format:
12372a6c8715SSebastian Neubauer case Intrinsic::amdgcn_struct_tbuffer_load:
12382a6c8715SSebastian Neubauer case Intrinsic::amdgcn_tbuffer_load:
12392a6c8715SSebastian Neubauer return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
12402a6c8715SSebastian Neubauer default: {
12412a6c8715SSebastian Neubauer if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
12422a6c8715SSebastian Neubauer return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
12432a6c8715SSebastian Neubauer }
12442a6c8715SSebastian Neubauer break;
12452a6c8715SSebastian Neubauer }
12462a6c8715SSebastian Neubauer }
12472a6c8715SSebastian Neubauer return None;
12482a6c8715SSebastian Neubauer }
1249