1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 <<<<<<< HEAD
15 #include "AMDGPU.h"
16 #include "AMDGPULegalizerInfo.h"
17 #include "GCNSubtarget.h"
18 =======
19 #include "AMDGPULegalizerInfo.h"
20 #include "AMDGPUTargetMachine.h"
21 >>>>>>> clang-format
22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
23 #include "llvm/CodeGen/GlobalISel/Combiner.h"
24 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
25 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
26 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
27 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
28 #include "llvm/CodeGen/MachineDominators.h"
29 #include "llvm/CodeGen/TargetPassConfig.h"
30 <<<<<<< HEAD
31 #include "llvm/Target/TargetMachine.h"
32 =======
33 #include "llvm/Support/Debug.h"
34 >>>>>>> clang-format
35 
36 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
37 
38 using namespace llvm;
39 using namespace MIPatternMatch;
40 
41 class AMDGPUPostLegalizerCombinerHelper {
42 protected:
43   MachineIRBuilder &B;
44   MachineFunction &MF;
45   MachineRegisterInfo &MRI;
46   CombinerHelper &Helper;
47 
48 public:
49   AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
50       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
51 
52   struct FMinFMaxLegacyInfo {
53     Register LHS;
54     Register RHS;
55     Register True;
56     Register False;
57     CmpInst::Predicate Pred;
58   };
59 
60   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
61   bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
62   void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
63                                          const FMinFMaxLegacyInfo &Info);
64 
65   bool matchUCharToFloat(MachineInstr &MI);
66   void applyUCharToFloat(MachineInstr &MI);
67 
68   // FIXME: Should be able to have 2 separate matchdatas rather than custom
69   // struct boilerplate.
70   struct CvtF32UByteMatchInfo {
71     Register CvtVal;
72     unsigned ShiftOffset;
73   };
74 
75   bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
76   void applyCvtF32UByteN(MachineInstr &MI,
77                          const CvtF32UByteMatchInfo &MatchInfo);
78 
79   struct ClampI64ToI16MatchInfo {
80     int64_t Cmp1;
81     int64_t Cmp2;
82     Register Origin;
83   };
84 
85   bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI,
86                           MachineFunction &MF,
87                           ClampI64ToI16MatchInfo &MatchInfo);
88 
89   void applyClampI64ToI16(MachineInstr &MI,
90                           const ClampI64ToI16MatchInfo &MatchInfo);
91 };
92 
93 bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
94     MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
95   // FIXME: Combines should have subtarget predicates, and we shouldn't need
96   // this here.
97   if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
98     return false;
99 
100   // FIXME: Type predicate on pattern
101   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
102     return false;
103 
104   Register Cond = MI.getOperand(1).getReg();
105   if (!MRI.hasOneNonDBGUse(Cond) ||
106       !mi_match(Cond, MRI,
107                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
108     return false;
109 
110   Info.True = MI.getOperand(2).getReg();
111   Info.False = MI.getOperand(3).getReg();
112 
113   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
114       !(Info.LHS == Info.False && Info.RHS == Info.True))
115     return false;
116 
117   switch (Info.Pred) {
118   case CmpInst::FCMP_FALSE:
119   case CmpInst::FCMP_OEQ:
120   case CmpInst::FCMP_ONE:
121   case CmpInst::FCMP_ORD:
122   case CmpInst::FCMP_UNO:
123   case CmpInst::FCMP_UEQ:
124   case CmpInst::FCMP_UNE:
125   case CmpInst::FCMP_TRUE:
126     return false;
127   default:
128     return true;
129   }
130 }
131 
132 void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
133     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
134   B.setInstrAndDebugLoc(MI);
135   auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
136     B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
137   };
138 
139   switch (Info.Pred) {
140   case CmpInst::FCMP_ULT:
141   case CmpInst::FCMP_ULE:
142     if (Info.LHS == Info.True)
143       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
144     else
145       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
146     break;
147   case CmpInst::FCMP_OLE:
148   case CmpInst::FCMP_OLT: {
149     // We need to permute the operands to get the correct NaN behavior. The
150     // selected operand is the second one based on the failing compare with NaN,
151     // so permute it based on the compare type the hardware uses.
152     if (Info.LHS == Info.True)
153       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
154     else
155       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
156     break;
157   }
158   case CmpInst::FCMP_UGE:
159   case CmpInst::FCMP_UGT: {
160     if (Info.LHS == Info.True)
161       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
162     else
163       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
164     break;
165   }
166   case CmpInst::FCMP_OGT:
167   case CmpInst::FCMP_OGE: {
168     if (Info.LHS == Info.True)
169       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
170     else
171       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
172     break;
173   }
174   default:
175     llvm_unreachable("predicate should not have matched");
176   }
177 
178   MI.eraseFromParent();
179 }
180 
181 bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
182   Register DstReg = MI.getOperand(0).getReg();
183 
184   // TODO: We could try to match extracting the higher bytes, which would be
185   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
186   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
187   // about in practice.
188   LLT Ty = MRI.getType(DstReg);
189   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
190     Register SrcReg = MI.getOperand(1).getReg();
191     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
192     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
193     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
194     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
195   }
196 
197   return false;
198 }
199 
200 void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
201   B.setInstrAndDebugLoc(MI);
202 
203   const LLT S32 = LLT::scalar(32);
204 
205   Register DstReg = MI.getOperand(0).getReg();
206   Register SrcReg = MI.getOperand(1).getReg();
207   LLT Ty = MRI.getType(DstReg);
208   LLT SrcTy = MRI.getType(SrcReg);
209   if (SrcTy != S32)
210     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
211 
212   if (Ty == S32) {
213     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
214                  MI.getFlags());
215   } else {
216     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
217                              MI.getFlags());
218     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
219   }
220 
221   MI.eraseFromParent();
222 }
223 
224 bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
225     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
226   Register SrcReg = MI.getOperand(1).getReg();
227 
228   // Look through G_ZEXT.
229   mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
230 
231   Register Src0;
232   int64_t ShiftAmt;
233   bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
234   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
235     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
236 
237     unsigned ShiftOffset = 8 * Offset;
238     if (IsShr)
239       ShiftOffset += ShiftAmt;
240     else
241       ShiftOffset -= ShiftAmt;
242 
243     MatchInfo.CvtVal = Src0;
244     MatchInfo.ShiftOffset = ShiftOffset;
245     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
246   }
247 
248   // TODO: Simplify demanded bits.
249   return false;
250 }
251 
252 void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
253     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
254   B.setInstrAndDebugLoc(MI);
255   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
256 
257   const LLT S32 = LLT::scalar(32);
258   Register CvtSrc = MatchInfo.CvtVal;
259   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
260   if (SrcTy != S32) {
261     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
262     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
263   }
264 
265   assert(MI.getOpcode() != NewOpc);
266   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
267   MI.eraseFromParent();
268 }
269 
270 bool AMDGPUPostLegalizerCombinerHelper::matchClampI64ToI16(
271     MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF,
272     ClampI64ToI16MatchInfo &MatchInfo) {
273   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
274   const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
275 
276   // we want to check if a 64-bit number gets clamped to 16-bit boundaries (or
277   // below).
278   if (SrcType != LLT::scalar(64))
279     return false;
280 
281   MachineIRBuilder B(MI);
282 
283   LLVM_DEBUG(dbgs() << "Matching Clamp i64 to i16");
284 
285   CmpInst::Predicate Predicate1;
286   Register Base;
287 
288   if (!mi_match(MI.getOperand(1).getReg(), MRI, m_GISelect(m_GICmp(m_Pred(Predicate1), m_Reg(), m_Reg()), m_Reg(Base), m_ICst(MatchInfo.Cmp1))))
289     return false;
290 
291   CmpInst::Predicate Predicate2;
292 
293   if (!mi_match(Base, MRI, m_GISelect(m_GICmp(m_Pred(Predicate2), m_Reg(), m_Reg()), m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2))))
294     return false;
295 
296   if ((Predicate1 == CmpInst::ICMP_SLT &&
297       Predicate2 == CmpInst::ICMP_SGT) ||
298       (Predicate1 == CmpInst::ICMP_SGT &&
299       Predicate2 == CmpInst::ICMP_SLT)) {
300     const auto Cmp1 = MatchInfo.Cmp1;
301     const auto Cmp2 = MatchInfo.Cmp2;
302     const auto Diff = std::abs(Cmp2 - Cmp1);
303 
304     // we don't need to clamp here.
305     if (Diff == 0 || Diff == 1) {
306       return false;
307     }
308 
309     const int64_t Min = std::numeric_limits<int16_t>::min();
310     const int64_t Max = std::numeric_limits<int16_t>::max();
311 
312     // are we really trying to clamp against the relevant boundaries?
313     return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
314             (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
315   }
316 
317   return false;
318 }
319 
320 /**
321  * We want to find a combination of instructions that
322  * gets generated when an i64 gets clamped to i16.
323  * The corresponding pattern is:
324  * G_SELECT MIN/MAX, G_ICMP, G_SELECT MIN/MAX, G_ICMP, G_TRUNC.
325  * This can be efficiently written as following:
326  * v_cvt_pk_i16_i32 v0, v0, v1
327  * v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
328  */
329 void AMDGPUPostLegalizerCombinerHelper::applyClampI64ToI16(
330     MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) {
331   LLVM_DEBUG(dbgs() << "Combining MI");
332 
333   MachineIRBuilder B(MI);
334   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
335 
336   Register Src = MatchInfo.Origin;
337   assert(MRI.getType(Src) == LLT::scalar(64));
338   const LLT S32 = LLT::scalar(32);
339 
340   auto Unmerge = B.buildUnmerge(S32, Src);
341   Register Hi32 = Unmerge->getOperand(0).getReg();
342   Register Lo32 = Unmerge->getOperand(1).getReg();
343   MRI.setRegClass(Hi32, &AMDGPU::VGPR_32RegClass);
344   MRI.setRegClass(Lo32, &AMDGPU::VGPR_32RegClass);
345 
346   constexpr unsigned int CvtOpcode = AMDGPU::V_CVT_PK_I16_I32_e64;
347   assert(MI.getOpcode() != CvtOpcode);
348 
349   const auto REG_CLASS = &AMDGPU::VGPR_32RegClass;
350 
351   Register CvtDst = MRI.createVirtualRegister(REG_CLASS);
352   MRI.setType(CvtDst, S32);
353 
354   auto CvtPk = B.buildInstr(CvtOpcode);
355   CvtPk.addDef(CvtDst);
356   CvtPk.addReg(Hi32);
357   CvtPk.addReg(Lo32);
358   CvtPk.setMIFlags(MI.getFlags());
359 
360   auto min = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
361   auto max = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
362 
363   Register MinBoundaryDst = MRI.createVirtualRegister(REG_CLASS);
364   MRI.setType(MinBoundaryDst, S32);
365   B.buildConstant(MinBoundaryDst, min);
366 
367   Register MaxBoundaryDst = MRI.createVirtualRegister(REG_CLASS);
368   MRI.setType(MaxBoundaryDst, S32);
369   B.buildConstant(MaxBoundaryDst, max);
370 
371   Register MedDst = MRI.createVirtualRegister(REG_CLASS);
372   MRI.setType(MedDst, S32);
373 
374   auto Med = B.buildInstr(AMDGPU::V_MED3_I32);
375   Med.addDef(MedDst);
376   Med.addReg(MinBoundaryDst);
377   Med.addReg(CvtDst);
378   Med.addReg(MaxBoundaryDst);
379   Med.setMIFlags(MI.getFlags());
380 
381   B.buildCopy(MI.getOperand(0).getReg(), MedDst);
382 
383   MI.eraseFromParent();
384 }
385 
386 class AMDGPUPostLegalizerCombinerHelperState {
387 protected:
388   CombinerHelper &Helper;
389   AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
390 
391 public:
392   AMDGPUPostLegalizerCombinerHelperState(
393       CombinerHelper &Helper,
394       AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
395       : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
396 };
397 
398 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
399 #include "AMDGPUGenPostLegalizeGICombiner.inc"
400 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
401 
402 namespace {
403 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
404 #include "AMDGPUGenPostLegalizeGICombiner.inc"
405 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
406 
407 class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
408   GISelKnownBits *KB;
409   MachineDominatorTree *MDT;
410 
411 public:
412   AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
413 
414   AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
415                                   const AMDGPULegalizerInfo *LI,
416                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
417       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
418                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
419         KB(KB), MDT(MDT) {
420     if (!GeneratedRuleCfg.parseCommandLineOption())
421       report_fatal_error("Invalid rule identifier");
422   }
423 
424   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
425                MachineIRBuilder &B) const override;
426 };
427 
428 bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
429                                               MachineInstr &MI,
430                                               MachineIRBuilder &B) const {
431   CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
432   AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
433   AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
434                                                  PostLegalizerHelper);
435 
436   if (Generated.tryCombineAll(Observer, MI, B))
437     return true;
438 
439   switch (MI.getOpcode()) {
440   case TargetOpcode::G_SHL:
441   case TargetOpcode::G_LSHR:
442   case TargetOpcode::G_ASHR:
443     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
444     // common case, splitting this into a move and a 32-bit shift is faster and
445     // the same code size.
446     return Helper.tryCombineShiftToUnmerge(MI, 32);
447   }
448 
449   return false;
450 }
451 
452 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
453 #include "AMDGPUGenPostLegalizeGICombiner.inc"
454 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
455 
456 // Pass boilerplate
457 // ================
458 
459 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
460 public:
461   static char ID;
462 
463   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
464 
465   StringRef getPassName() const override {
466     return "AMDGPUPostLegalizerCombiner";
467   }
468 
469   bool runOnMachineFunction(MachineFunction &MF) override;
470 
471   void getAnalysisUsage(AnalysisUsage &AU) const override;
472 
473 private:
474   bool IsOptNone;
475 };
476 } // end anonymous namespace
477 
478 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
479   AU.addRequired<TargetPassConfig>();
480   AU.setPreservesCFG();
481   getSelectionDAGFallbackAnalysisUsage(AU);
482   AU.addRequired<GISelKnownBitsAnalysis>();
483   AU.addPreserved<GISelKnownBitsAnalysis>();
484   if (!IsOptNone) {
485     AU.addRequired<MachineDominatorTree>();
486     AU.addPreserved<MachineDominatorTree>();
487   }
488   MachineFunctionPass::getAnalysisUsage(AU);
489 }
490 
491 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
492     : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
493   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
494 }
495 
496 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
497   if (MF.getProperties().hasProperty(
498           MachineFunctionProperties::Property::FailedISel))
499     return false;
500   auto *TPC = &getAnalysis<TargetPassConfig>();
501   const Function &F = MF.getFunction();
502   bool EnableOpt =
503       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
504 
505   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
506   const AMDGPULegalizerInfo *LI =
507       static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
508 
509   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
510   MachineDominatorTree *MDT =
511       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
512   AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
513                                          F.hasMinSize(), LI, KB, MDT);
514   Combiner C(PCInfo, TPC);
515   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
516 }
517 
518 char AMDGPUPostLegalizerCombiner::ID = 0;
519 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
520                       "Combine AMDGPU machine instrs after legalization", false,
521                       false)
522 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
523 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
524 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
525                     "Combine AMDGPU machine instrs after legalization", false,
526                     false)
527 
528 namespace llvm {
529 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
530   return new AMDGPUPostLegalizerCombiner(IsOptNone);
531 }
532 } // end namespace llvm
533