1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "AMDGPU.h"
15 #include "AMDGPUCombinerHelper.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "GCNSubtarget.h"
18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19 #include "llvm/CodeGen/GlobalISel/Combiner.h"
20 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
21 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
22 #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h"
23 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
24 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
25 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
26 #include "llvm/CodeGen/MachineDominators.h"
27 #include "llvm/CodeGen/TargetPassConfig.h"
28 #include "llvm/IR/IntrinsicsAMDGPU.h"
29 #include "llvm/Target/TargetMachine.h"
30
31 #define GET_GICOMBINER_DEPS
32 #include "AMDGPUGenPreLegalizeGICombiner.inc"
33 #undef GET_GICOMBINER_DEPS
34
35 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
36
37 using namespace llvm;
38 using namespace MIPatternMatch;
39
40 namespace {
41 #define GET_GICOMBINER_TYPES
42 #include "AMDGPUGenPostLegalizeGICombiner.inc"
43 #undef GET_GICOMBINER_TYPES
44
45 class AMDGPUPostLegalizerCombinerImpl : public Combiner {
46 protected:
47 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig;
48 const GCNSubtarget &STI;
49 const SIInstrInfo &TII;
50 // TODO: Make CombinerHelper methods const.
51 mutable AMDGPUCombinerHelper Helper;
52
53 public:
54 AMDGPUPostLegalizerCombinerImpl(
55 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
56 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
57 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
58 const GCNSubtarget &STI, MachineDominatorTree *MDT,
59 const LegalizerInfo *LI);
60
getName()61 static const char *getName() { return "AMDGPUPostLegalizerCombinerImpl"; }
62
63 bool tryCombineAllImpl(MachineInstr &I) const;
64 bool tryCombineAll(MachineInstr &I) const override;
65
66 struct FMinFMaxLegacyInfo {
67 Register LHS;
68 Register RHS;
69 Register True;
70 Register False;
71 CmpInst::Predicate Pred;
72 };
73
74 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
75 bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info) const;
76 void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
77 const FMinFMaxLegacyInfo &Info) const;
78
79 bool matchUCharToFloat(MachineInstr &MI) const;
80 void applyUCharToFloat(MachineInstr &MI) const;
81
82 bool
83 matchRcpSqrtToRsq(MachineInstr &MI,
84 std::function<void(MachineIRBuilder &)> &MatchInfo) const;
85
86 // FIXME: Should be able to have 2 separate matchdatas rather than custom
87 // struct boilerplate.
88 struct CvtF32UByteMatchInfo {
89 Register CvtVal;
90 unsigned ShiftOffset;
91 };
92
93 bool matchCvtF32UByteN(MachineInstr &MI,
94 CvtF32UByteMatchInfo &MatchInfo) const;
95 void applyCvtF32UByteN(MachineInstr &MI,
96 const CvtF32UByteMatchInfo &MatchInfo) const;
97
98 bool matchRemoveFcanonicalize(MachineInstr &MI, Register &Reg) const;
99
100 // Combine unsigned buffer load and signed extension instructions to generate
101 // signed buffer laod instructions.
102 bool matchCombineSignExtendInReg(
103 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
104 void applyCombineSignExtendInReg(
105 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchInfo) const;
106
107 // Find the s_mul_u64 instructions where the higher bits are either
108 // zero-extended or sign-extended.
109 bool matchCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
110 // Replace the s_mul_u64 instructions with S_MUL_I64_I32_PSEUDO if the higher
111 // 33 bits are sign extended and with S_MUL_U64_U32_PSEUDO if the higher 32
112 // bits are zero extended.
113 void applyCombine_s_mul_u64(MachineInstr &MI, unsigned &NewOpcode) const;
114
115 private:
116 #define GET_GICOMBINER_CLASS_MEMBERS
117 #define AMDGPUSubtarget GCNSubtarget
118 #include "AMDGPUGenPostLegalizeGICombiner.inc"
119 #undef GET_GICOMBINER_CLASS_MEMBERS
120 #undef AMDGPUSubtarget
121 };
122
123 #define GET_GICOMBINER_IMPL
124 #define AMDGPUSubtarget GCNSubtarget
125 #include "AMDGPUGenPostLegalizeGICombiner.inc"
126 #undef AMDGPUSubtarget
127 #undef GET_GICOMBINER_IMPL
128
AMDGPUPostLegalizerCombinerImpl(MachineFunction & MF,CombinerInfo & CInfo,const TargetPassConfig * TPC,GISelKnownBits & KB,GISelCSEInfo * CSEInfo,const AMDGPUPostLegalizerCombinerImplRuleConfig & RuleConfig,const GCNSubtarget & STI,MachineDominatorTree * MDT,const LegalizerInfo * LI)129 AMDGPUPostLegalizerCombinerImpl::AMDGPUPostLegalizerCombinerImpl(
130 MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC,
131 GISelKnownBits &KB, GISelCSEInfo *CSEInfo,
132 const AMDGPUPostLegalizerCombinerImplRuleConfig &RuleConfig,
133 const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI)
134 : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI),
135 TII(*STI.getInstrInfo()),
136 Helper(Observer, B, /*IsPreLegalize*/ false, &KB, MDT, LI),
137 #define GET_GICOMBINER_CONSTRUCTOR_INITS
138 #include "AMDGPUGenPostLegalizeGICombiner.inc"
139 #undef GET_GICOMBINER_CONSTRUCTOR_INITS
140 {
141 }
142
tryCombineAll(MachineInstr & MI) const143 bool AMDGPUPostLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
144 if (tryCombineAllImpl(MI))
145 return true;
146
147 switch (MI.getOpcode()) {
148 case TargetOpcode::G_SHL:
149 case TargetOpcode::G_LSHR:
150 case TargetOpcode::G_ASHR:
151 // On some subtargets, 64-bit shift is a quarter rate instruction. In the
152 // common case, splitting this into a move and a 32-bit shift is faster and
153 // the same code size.
154 return Helper.tryCombineShiftToUnmerge(MI, 32);
155 }
156
157 return false;
158 }
159
matchFMinFMaxLegacy(MachineInstr & MI,FMinFMaxLegacyInfo & Info) const160 bool AMDGPUPostLegalizerCombinerImpl::matchFMinFMaxLegacy(
161 MachineInstr &MI, FMinFMaxLegacyInfo &Info) const {
162 // FIXME: Type predicate on pattern
163 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
164 return false;
165
166 Register Cond = MI.getOperand(1).getReg();
167 if (!MRI.hasOneNonDBGUse(Cond) ||
168 !mi_match(Cond, MRI,
169 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
170 return false;
171
172 Info.True = MI.getOperand(2).getReg();
173 Info.False = MI.getOperand(3).getReg();
174
175 // TODO: Handle case where the the selected value is an fneg and the compared
176 // constant is the negation of the selected value.
177 if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
178 !(Info.LHS == Info.False && Info.RHS == Info.True))
179 return false;
180
181 switch (Info.Pred) {
182 case CmpInst::FCMP_FALSE:
183 case CmpInst::FCMP_OEQ:
184 case CmpInst::FCMP_ONE:
185 case CmpInst::FCMP_ORD:
186 case CmpInst::FCMP_UNO:
187 case CmpInst::FCMP_UEQ:
188 case CmpInst::FCMP_UNE:
189 case CmpInst::FCMP_TRUE:
190 return false;
191 default:
192 return true;
193 }
194 }
195
applySelectFCmpToFMinToFMaxLegacy(MachineInstr & MI,const FMinFMaxLegacyInfo & Info) const196 void AMDGPUPostLegalizerCombinerImpl::applySelectFCmpToFMinToFMaxLegacy(
197 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) const {
198 B.setInstrAndDebugLoc(MI);
199 auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
200 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
201 };
202
203 switch (Info.Pred) {
204 case CmpInst::FCMP_ULT:
205 case CmpInst::FCMP_ULE:
206 if (Info.LHS == Info.True)
207 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
208 else
209 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
210 break;
211 case CmpInst::FCMP_OLE:
212 case CmpInst::FCMP_OLT: {
213 // We need to permute the operands to get the correct NaN behavior. The
214 // selected operand is the second one based on the failing compare with NaN,
215 // so permute it based on the compare type the hardware uses.
216 if (Info.LHS == Info.True)
217 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
218 else
219 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
220 break;
221 }
222 case CmpInst::FCMP_UGE:
223 case CmpInst::FCMP_UGT: {
224 if (Info.LHS == Info.True)
225 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
226 else
227 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
228 break;
229 }
230 case CmpInst::FCMP_OGT:
231 case CmpInst::FCMP_OGE: {
232 if (Info.LHS == Info.True)
233 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
234 else
235 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
236 break;
237 }
238 default:
239 llvm_unreachable("predicate should not have matched");
240 }
241
242 MI.eraseFromParent();
243 }
244
matchUCharToFloat(MachineInstr & MI) const245 bool AMDGPUPostLegalizerCombinerImpl::matchUCharToFloat(
246 MachineInstr &MI) const {
247 Register DstReg = MI.getOperand(0).getReg();
248
249 // TODO: We could try to match extracting the higher bytes, which would be
250 // easier if i8 vectors weren't promoted to i32 vectors, particularly after
251 // types are legalized. v4i8 -> v4f32 is probably the only case to worry
252 // about in practice.
253 LLT Ty = MRI.getType(DstReg);
254 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
255 Register SrcReg = MI.getOperand(1).getReg();
256 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
257 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
258 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
259 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
260 }
261
262 return false;
263 }
264
applyUCharToFloat(MachineInstr & MI) const265 void AMDGPUPostLegalizerCombinerImpl::applyUCharToFloat(
266 MachineInstr &MI) const {
267 B.setInstrAndDebugLoc(MI);
268
269 const LLT S32 = LLT::scalar(32);
270
271 Register DstReg = MI.getOperand(0).getReg();
272 Register SrcReg = MI.getOperand(1).getReg();
273 LLT Ty = MRI.getType(DstReg);
274 LLT SrcTy = MRI.getType(SrcReg);
275 if (SrcTy != S32)
276 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
277
278 if (Ty == S32) {
279 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
280 MI.getFlags());
281 } else {
282 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
283 MI.getFlags());
284 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
285 }
286
287 MI.eraseFromParent();
288 }
289
matchRcpSqrtToRsq(MachineInstr & MI,std::function<void (MachineIRBuilder &)> & MatchInfo) const290 bool AMDGPUPostLegalizerCombinerImpl::matchRcpSqrtToRsq(
291 MachineInstr &MI,
292 std::function<void(MachineIRBuilder &)> &MatchInfo) const {
293 auto getRcpSrc = [=](const MachineInstr &MI) -> MachineInstr * {
294 if (!MI.getFlag(MachineInstr::FmContract))
295 return nullptr;
296
297 if (auto *GI = dyn_cast<GIntrinsic>(&MI)) {
298 if (GI->is(Intrinsic::amdgcn_rcp))
299 return MRI.getVRegDef(MI.getOperand(2).getReg());
300 }
301 return nullptr;
302 };
303
304 auto getSqrtSrc = [=](const MachineInstr &MI) -> MachineInstr * {
305 if (!MI.getFlag(MachineInstr::FmContract))
306 return nullptr;
307 MachineInstr *SqrtSrcMI = nullptr;
308 auto Match =
309 mi_match(MI.getOperand(0).getReg(), MRI, m_GFSqrt(m_MInstr(SqrtSrcMI)));
310 (void)Match;
311 return SqrtSrcMI;
312 };
313
314 MachineInstr *RcpSrcMI = nullptr, *SqrtSrcMI = nullptr;
315 // rcp(sqrt(x))
316 if ((RcpSrcMI = getRcpSrc(MI)) && (SqrtSrcMI = getSqrtSrc(*RcpSrcMI))) {
317 MatchInfo = [SqrtSrcMI, &MI](MachineIRBuilder &B) {
318 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
319 .addUse(SqrtSrcMI->getOperand(0).getReg())
320 .setMIFlags(MI.getFlags());
321 };
322 return true;
323 }
324
325 // sqrt(rcp(x))
326 if ((SqrtSrcMI = getSqrtSrc(MI)) && (RcpSrcMI = getRcpSrc(*SqrtSrcMI))) {
327 MatchInfo = [RcpSrcMI, &MI](MachineIRBuilder &B) {
328 B.buildIntrinsic(Intrinsic::amdgcn_rsq, {MI.getOperand(0)})
329 .addUse(RcpSrcMI->getOperand(0).getReg())
330 .setMIFlags(MI.getFlags());
331 };
332 return true;
333 }
334 return false;
335 }
336
matchCvtF32UByteN(MachineInstr & MI,CvtF32UByteMatchInfo & MatchInfo) const337 bool AMDGPUPostLegalizerCombinerImpl::matchCvtF32UByteN(
338 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) const {
339 Register SrcReg = MI.getOperand(1).getReg();
340
341 // Look through G_ZEXT.
342 bool IsShr = mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
343
344 Register Src0;
345 int64_t ShiftAmt;
346 IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
347 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
348 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
349
350 unsigned ShiftOffset = 8 * Offset;
351 if (IsShr)
352 ShiftOffset += ShiftAmt;
353 else
354 ShiftOffset -= ShiftAmt;
355
356 MatchInfo.CvtVal = Src0;
357 MatchInfo.ShiftOffset = ShiftOffset;
358 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
359 }
360
361 // TODO: Simplify demanded bits.
362 return false;
363 }
364
applyCvtF32UByteN(MachineInstr & MI,const CvtF32UByteMatchInfo & MatchInfo) const365 void AMDGPUPostLegalizerCombinerImpl::applyCvtF32UByteN(
366 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) const {
367 B.setInstrAndDebugLoc(MI);
368 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
369
370 const LLT S32 = LLT::scalar(32);
371 Register CvtSrc = MatchInfo.CvtVal;
372 LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
373 if (SrcTy != S32) {
374 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
375 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
376 }
377
378 assert(MI.getOpcode() != NewOpc);
379 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
380 MI.eraseFromParent();
381 }
382
matchRemoveFcanonicalize(MachineInstr & MI,Register & Reg) const383 bool AMDGPUPostLegalizerCombinerImpl::matchRemoveFcanonicalize(
384 MachineInstr &MI, Register &Reg) const {
385 const SITargetLowering *TLI = static_cast<const SITargetLowering *>(
386 MF.getSubtarget().getTargetLowering());
387 Reg = MI.getOperand(1).getReg();
388 return TLI->isCanonicalized(Reg, MF);
389 }
390
391 // The buffer_load_{i8, i16} intrinsics are intially lowered as buffer_load_{u8,
392 // u16} instructions. Here, the buffer_load_{u8, u16} instructions are combined
393 // with sign extension instrucions in order to generate buffer_load_{i8, i16}
394 // instructions.
395
396 // Identify buffer_load_{u8, u16}.
matchCombineSignExtendInReg(MachineInstr & MI,std::pair<MachineInstr *,unsigned> & MatchData) const397 bool AMDGPUPostLegalizerCombinerImpl::matchCombineSignExtendInReg(
398 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
399 Register LoadReg = MI.getOperand(1).getReg();
400 if (!MRI.hasOneNonDBGUse(LoadReg))
401 return false;
402
403 // Check if the first operand of the sign extension is a subword buffer load
404 // instruction.
405 MachineInstr *LoadMI = MRI.getVRegDef(LoadReg);
406 int64_t Width = MI.getOperand(2).getImm();
407 switch (LoadMI->getOpcode()) {
408 case AMDGPU::G_AMDGPU_BUFFER_LOAD_UBYTE:
409 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SBYTE};
410 return Width == 8;
411 case AMDGPU::G_AMDGPU_BUFFER_LOAD_USHORT:
412 MatchData = {LoadMI, AMDGPU::G_AMDGPU_BUFFER_LOAD_SSHORT};
413 return Width == 16;
414 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_UBYTE:
415 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SBYTE};
416 return Width == 8;
417 case AMDGPU::G_AMDGPU_S_BUFFER_LOAD_USHORT:
418 MatchData = {LoadMI, AMDGPU::G_AMDGPU_S_BUFFER_LOAD_SSHORT};
419 return Width == 16;
420 }
421 return false;
422 }
423
424 // Combine buffer_load_{u8, u16} and the sign extension instruction to generate
425 // buffer_load_{i8, i16}.
applyCombineSignExtendInReg(MachineInstr & MI,std::pair<MachineInstr *,unsigned> & MatchData) const426 void AMDGPUPostLegalizerCombinerImpl::applyCombineSignExtendInReg(
427 MachineInstr &MI, std::pair<MachineInstr *, unsigned> &MatchData) const {
428 auto [LoadMI, NewOpcode] = MatchData;
429 LoadMI->setDesc(TII.get(NewOpcode));
430 // Update the destination register of the load with the destination register
431 // of the sign extension.
432 Register SignExtendInsnDst = MI.getOperand(0).getReg();
433 LoadMI->getOperand(0).setReg(SignExtendInsnDst);
434 // Remove the sign extension.
435 MI.eraseFromParent();
436 }
437
matchCombine_s_mul_u64(MachineInstr & MI,unsigned & NewOpcode) const438 bool AMDGPUPostLegalizerCombinerImpl::matchCombine_s_mul_u64(
439 MachineInstr &MI, unsigned &NewOpcode) const {
440 Register Src0 = MI.getOperand(1).getReg();
441 Register Src1 = MI.getOperand(2).getReg();
442 if (MRI.getType(Src0) != LLT::scalar(64))
443 return false;
444
445 if (KB->getKnownBits(Src1).countMinLeadingZeros() >= 32 &&
446 KB->getKnownBits(Src0).countMinLeadingZeros() >= 32) {
447 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_U64_U32;
448 return true;
449 }
450
451 if (KB->computeNumSignBits(Src1) >= 33 &&
452 KB->computeNumSignBits(Src0) >= 33) {
453 NewOpcode = AMDGPU::G_AMDGPU_S_MUL_I64_I32;
454 return true;
455 }
456 return false;
457 }
458
applyCombine_s_mul_u64(MachineInstr & MI,unsigned & NewOpcode) const459 void AMDGPUPostLegalizerCombinerImpl::applyCombine_s_mul_u64(
460 MachineInstr &MI, unsigned &NewOpcode) const {
461 Helper.replaceOpcodeWith(MI, NewOpcode);
462 }
463
464 // Pass boilerplate
465 // ================
466
467 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
468 public:
469 static char ID;
470
471 AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
472
getPassName() const473 StringRef getPassName() const override {
474 return "AMDGPUPostLegalizerCombiner";
475 }
476
477 bool runOnMachineFunction(MachineFunction &MF) override;
478
479 void getAnalysisUsage(AnalysisUsage &AU) const override;
480
481 private:
482 bool IsOptNone;
483 AMDGPUPostLegalizerCombinerImplRuleConfig RuleConfig;
484 };
485 } // end anonymous namespace
486
getAnalysisUsage(AnalysisUsage & AU) const487 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
488 AU.addRequired<TargetPassConfig>();
489 AU.setPreservesCFG();
490 getSelectionDAGFallbackAnalysisUsage(AU);
491 AU.addRequired<GISelKnownBitsAnalysis>();
492 AU.addPreserved<GISelKnownBitsAnalysis>();
493 if (!IsOptNone) {
494 AU.addRequired<MachineDominatorTree>();
495 AU.addPreserved<MachineDominatorTree>();
496 }
497 MachineFunctionPass::getAnalysisUsage(AU);
498 }
499
AMDGPUPostLegalizerCombiner(bool IsOptNone)500 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
501 : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
502 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
503
504 if (!RuleConfig.parseCommandLineOption())
505 report_fatal_error("Invalid rule identifier");
506 }
507
runOnMachineFunction(MachineFunction & MF)508 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
509 if (MF.getProperties().hasProperty(
510 MachineFunctionProperties::Property::FailedISel))
511 return false;
512 auto *TPC = &getAnalysis<TargetPassConfig>();
513 const Function &F = MF.getFunction();
514 bool EnableOpt =
515 MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F);
516
517 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
518 const AMDGPULegalizerInfo *LI =
519 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
520
521 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
522 MachineDominatorTree *MDT =
523 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
524
525 CombinerInfo CInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
526 LI, EnableOpt, F.hasOptSize(), F.hasMinSize());
527
528 AMDGPUPostLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, /*CSEInfo*/ nullptr,
529 RuleConfig, ST, MDT, LI);
530 return Impl.combineMachineInstrs();
531 }
532
533 char AMDGPUPostLegalizerCombiner::ID = 0;
534 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
535 "Combine AMDGPU machine instrs after legalization", false,
536 false)
537 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
538 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
539 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
540 "Combine AMDGPU machine instrs after legalization", false,
541 false)
542
543 namespace llvm {
createAMDGPUPostLegalizeCombiner(bool IsOptNone)544 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
545 return new AMDGPUPostLegalizerCombiner(IsOptNone);
546 }
547 } // end namespace llvm
548