1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp
2 //---------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass does combining of machine instructions at the generic MI level,
11 // after the legalizer.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 <<<<<<< HEAD
16 #include "AMDGPU.h"
17 #include "AMDGPULegalizerInfo.h"
18 #include "GCNSubtarget.h"
19 =======
20 #include "AMDGPULegalizerInfo.h"
21 #include "AMDGPUTargetMachine.h"
22 >>>>>>> clang-format
23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
24 #include "llvm/CodeGen/GlobalISel/Combiner.h"
25 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
26 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
27 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
28 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
29 #include "llvm/CodeGen/MachineDominators.h"
30 #include "llvm/CodeGen/TargetPassConfig.h"
31 <<<<<<< HEAD
32 #include "llvm/Target/TargetMachine.h"
33 =======
34 #include "llvm/Support/Debug.h"
35 >>>>>>> clang-format
36 
37 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
38 
39 using namespace llvm;
40 using namespace MIPatternMatch;
41 
42 class AMDGPUPostLegalizerCombinerHelper {
43 protected:
44   MachineIRBuilder &B;
45   MachineFunction &MF;
46   MachineRegisterInfo &MRI;
47   CombinerHelper &Helper;
48 
49 public:
50   AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
51       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
52 
53   struct FMinFMaxLegacyInfo {
54     Register LHS;
55     Register RHS;
56     Register True;
57     Register False;
58     CmpInst::Predicate Pred;
59   };
60 
61   // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
62   bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info);
63   void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
64                                          const FMinFMaxLegacyInfo &Info);
65 
66   bool matchUCharToFloat(MachineInstr &MI);
67   void applyUCharToFloat(MachineInstr &MI);
68 
69   // FIXME: Should be able to have 2 separate matchdatas rather than custom
70   // struct boilerplate.
71   struct CvtF32UByteMatchInfo {
72     Register CvtVal;
73     unsigned ShiftOffset;
74   };
75 
76   bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo);
77   void applyCvtF32UByteN(MachineInstr &MI,
78                          const CvtF32UByteMatchInfo &MatchInfo);
79 
80   struct ClampI64ToI16MatchInfo {
81     int64_t Cmp1;
82     int64_t Cmp2;
83     Register Origin;
84   };
85 
86   bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI,
87                           MachineFunction &MF,
88                           ClampI64ToI16MatchInfo &MatchInfo);
89 
90   void applyClampI64ToI16(MachineInstr &MI,
91                           const ClampI64ToI16MatchInfo &MatchInfo);
92 };
93 
94 bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy(
95     MachineInstr &MI, FMinFMaxLegacyInfo &Info) {
96   // FIXME: Combines should have subtarget predicates, and we shouldn't need
97   // this here.
98   if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
99     return false;
100 
101   // FIXME: Type predicate on pattern
102   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
103     return false;
104 
105   Register Cond = MI.getOperand(1).getReg();
106   if (!MRI.hasOneNonDBGUse(Cond) ||
107       !mi_match(Cond, MRI,
108                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
109     return false;
110 
111   Info.True = MI.getOperand(2).getReg();
112   Info.False = MI.getOperand(3).getReg();
113 
114   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
115       !(Info.LHS == Info.False && Info.RHS == Info.True))
116     return false;
117 
118   switch (Info.Pred) {
119   case CmpInst::FCMP_FALSE:
120   case CmpInst::FCMP_OEQ:
121   case CmpInst::FCMP_ONE:
122   case CmpInst::FCMP_ORD:
123   case CmpInst::FCMP_UNO:
124   case CmpInst::FCMP_UEQ:
125   case CmpInst::FCMP_UNE:
126   case CmpInst::FCMP_TRUE:
127     return false;
128   default:
129     return true;
130   }
131 }
132 
133 void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy(
134     MachineInstr &MI, const FMinFMaxLegacyInfo &Info) {
135   B.setInstrAndDebugLoc(MI);
136   auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) {
137     B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
138   };
139 
140   switch (Info.Pred) {
141   case CmpInst::FCMP_ULT:
142   case CmpInst::FCMP_ULE:
143     if (Info.LHS == Info.True)
144       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
145     else
146       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
147     break;
148   case CmpInst::FCMP_OLE:
149   case CmpInst::FCMP_OLT: {
150     // We need to permute the operands to get the correct NaN behavior. The
151     // selected operand is the second one based on the failing compare with NaN,
152     // so permute it based on the compare type the hardware uses.
153     if (Info.LHS == Info.True)
154       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
155     else
156       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
157     break;
158   }
159   case CmpInst::FCMP_UGE:
160   case CmpInst::FCMP_UGT: {
161     if (Info.LHS == Info.True)
162       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
163     else
164       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
165     break;
166   }
167   case CmpInst::FCMP_OGT:
168   case CmpInst::FCMP_OGE: {
169     if (Info.LHS == Info.True)
170       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
171     else
172       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
173     break;
174   }
175   default:
176     llvm_unreachable("predicate should not have matched");
177   }
178 
179   MI.eraseFromParent();
180 }
181 
182 bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) {
183   Register DstReg = MI.getOperand(0).getReg();
184 
185   // TODO: We could try to match extracting the higher bytes, which would be
186   // easier if i8 vectors weren't promoted to i32 vectors, particularly after
187   // types are legalized. v4i8 -> v4f32 is probably the only case to worry
188   // about in practice.
189   LLT Ty = MRI.getType(DstReg);
190   if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) {
191     Register SrcReg = MI.getOperand(1).getReg();
192     unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits();
193     assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64);
194     const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8);
195     return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask);
196   }
197 
198   return false;
199 }
200 
201 void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) {
202   B.setInstrAndDebugLoc(MI);
203 
204   const LLT S32 = LLT::scalar(32);
205 
206   Register DstReg = MI.getOperand(0).getReg();
207   Register SrcReg = MI.getOperand(1).getReg();
208   LLT Ty = MRI.getType(DstReg);
209   LLT SrcTy = MRI.getType(SrcReg);
210   if (SrcTy != S32)
211     SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0);
212 
213   if (Ty == S32) {
214     B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg},
215                  MI.getFlags());
216   } else {
217     auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg},
218                              MI.getFlags());
219     B.buildFPTrunc(DstReg, Cvt0, MI.getFlags());
220   }
221 
222   MI.eraseFromParent();
223 }
224 
225 bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN(
226     MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) {
227   Register SrcReg = MI.getOperand(1).getReg();
228 
229   // Look through G_ZEXT.
230   mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg)));
231 
232   Register Src0;
233   int64_t ShiftAmt;
234   bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt)));
235   if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) {
236     const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0;
237 
238     unsigned ShiftOffset = 8 * Offset;
239     if (IsShr)
240       ShiftOffset += ShiftAmt;
241     else
242       ShiftOffset -= ShiftAmt;
243 
244     MatchInfo.CvtVal = Src0;
245     MatchInfo.ShiftOffset = ShiftOffset;
246     return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0;
247   }
248 
249   // TODO: Simplify demanded bits.
250   return false;
251 }
252 
253 void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN(
254     MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) {
255   B.setInstrAndDebugLoc(MI);
256   unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8;
257 
258   const LLT S32 = LLT::scalar(32);
259   Register CvtSrc = MatchInfo.CvtVal;
260   LLT SrcTy = MRI.getType(MatchInfo.CvtVal);
261   if (SrcTy != S32) {
262     assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8);
263     CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0);
264   }
265 
266   assert(MI.getOpcode() != NewOpc);
267   B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags());
268   MI.eraseFromParent();
269 }
270 
271 bool AMDGPUPostLegalizerCombinerHelper::matchClampI64ToI16(
272     MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF,
273     ClampI64ToI16MatchInfo &MatchInfo) {
274   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
275   const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
276 
277   // we want to check if a 64-bit number gets clamped to 16-bit boundaries (or
278   // below).
279   if (SrcType != LLT::scalar(64))
280     return false;
281 
282   MachineIRBuilder B(MI);
283 
284   LLVM_DEBUG(dbgs() << "Matching Clamp i64 to i16");
285 
286   CmpInst::Predicate Predicate1;
287   Register Base;
288 
289   if (!mi_match(MI.getOperand(1).getReg(), MRI, m_GISelect(m_GICmp(m_Pred(Predicate1), m_Reg(), m_Reg()), m_Reg(Base), m_ICst(MatchInfo.Cmp1))))
290     return false;
291 
292   CmpInst::Predicate Predicate2;
293 
294   if (!mi_match(Base, MRI, m_GISelect(m_GICmp(m_Pred(Predicate2), m_Reg(), m_Reg()), m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2))))
295     return false;
296 
297   if ((Predicate1 == CmpInst::ICMP_SLT &&
298       Predicate2 == CmpInst::ICMP_SGT) ||
299       (Predicate1 == CmpInst::ICMP_SGT &&
300       Predicate2 == CmpInst::ICMP_SLT)) {
301     const auto Cmp1 = MatchInfo.Cmp1;
302     const auto Cmp2 = MatchInfo.Cmp2;
303     const auto Diff = std::abs(Cmp2 - Cmp1);
304 
305     // we don't need to clamp here.
306     if (Diff == 0 || Diff == 1) {
307       return false;
308     }
309 
310     const int64_t Min = std::numeric_limits<int16_t>::min();
311     const int64_t Max = std::numeric_limits<int16_t>::max();
312 
313     // are we really trying to clamp against the relevant boundaries?
314     return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
315             (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
316   }
317 
318   return false;
319 }
320 
321 /**
322  * We want to find a combination of instructions that
323  * gets generated when an i64 gets clamped to i16.
324  * The corresponding pattern is:
325  * G_SELECT MIN/MAX, G_ICMP, G_SELECT MIN/MAX, G_ICMP, G_TRUNC.
326  * This can be efficiently written as following:
327  * v_cvt_pk_i16_i32 v0, v0, v1
328  * v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
329  */
330 void AMDGPUPostLegalizerCombinerHelper::applyClampI64ToI16(
331     MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) {
332   LLVM_DEBUG(dbgs() << "Combining MI");
333 
334   MachineIRBuilder B(MI);
335   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
336 
337   Register Src = MatchInfo.Origin;
338   assert(MRI.getType(Src) == LLT::scalar(64));
339   const LLT S32 = LLT::scalar(32);
340 
341   auto Unmerge = B.buildUnmerge(S32, Src);
342   Register Hi32 = Unmerge->getOperand(0).getReg();
343   Register Lo32 = Unmerge->getOperand(1).getReg();
344   MRI.setRegClass(Hi32, &AMDGPU::VGPR_32RegClass);
345   MRI.setRegClass(Lo32, &AMDGPU::VGPR_32RegClass);
346 
347   constexpr unsigned int CvtOpcode = AMDGPU::V_CVT_PK_I16_I32_e64;
348   assert(MI.getOpcode() != CvtOpcode);
349 
350   const auto REG_CLASS = &AMDGPU::VGPR_32RegClass;
351 
352   Register CvtDst = MRI.createVirtualRegister(REG_CLASS);
353   MRI.setType(CvtDst, S32);
354 
355   auto CvtPk = B.buildInstr(CvtOpcode);
356   CvtPk.addDef(CvtDst);
357   CvtPk.addReg(Hi32);
358   CvtPk.addReg(Lo32);
359   CvtPk.setMIFlags(MI.getFlags());
360 
361   auto min = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
362   auto max = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
363 
364   Register MinBoundaryDst = MRI.createVirtualRegister(REG_CLASS);
365   MRI.setType(MinBoundaryDst, S32);
366   B.buildConstant(MinBoundaryDst, min);
367 
368   Register MaxBoundaryDst = MRI.createVirtualRegister(REG_CLASS);
369   MRI.setType(MaxBoundaryDst, S32);
370   B.buildConstant(MaxBoundaryDst, max);
371 
372   Register MedDst = MRI.createVirtualRegister(REG_CLASS);
373   MRI.setType(MedDst, S32);
374 
375   auto Med = B.buildInstr(AMDGPU::V_MED3_I32);
376   Med.addDef(MedDst);
377   Med.addReg(MinBoundaryDst);
378   Med.addReg(CvtDst);
379   Med.addReg(MaxBoundaryDst);
380   Med.setMIFlags(MI.getFlags());
381 
382   B.buildCopy(MI.getOperand(0).getReg(), MedDst);
383 
384   MI.eraseFromParent();
385 }
386 
387 class AMDGPUPostLegalizerCombinerHelperState {
388 protected:
389   CombinerHelper &Helper;
390   AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper;
391 
392 public:
393   AMDGPUPostLegalizerCombinerHelperState(
394       CombinerHelper &Helper,
395       AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper)
396       : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {}
397 };
398 
399 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
400 #include "AMDGPUGenPostLegalizeGICombiner.inc"
401 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
402 
403 namespace {
404 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
405 #include "AMDGPUGenPostLegalizeGICombiner.inc"
406 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
407 
408 class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo {
409   GISelKnownBits *KB;
410   MachineDominatorTree *MDT;
411 
412 public:
413   AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
414 
415   AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
416                                   const AMDGPULegalizerInfo *LI,
417                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
418       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
419                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
420         KB(KB), MDT(MDT) {
421     if (!GeneratedRuleCfg.parseCommandLineOption())
422       report_fatal_error("Invalid rule identifier");
423   }
424 
425   bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
426                MachineIRBuilder &B) const override;
427 };
428 
429 bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
430                                               MachineInstr &MI,
431                                               MachineIRBuilder &B) const {
432   CombinerHelper Helper(Observer, B, KB, MDT, LInfo);
433   AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper);
434   AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
435                                                  PostLegalizerHelper);
436 
437   if (Generated.tryCombineAll(Observer, MI, B))
438     return true;
439 
440   switch (MI.getOpcode()) {
441   case TargetOpcode::G_SHL:
442   case TargetOpcode::G_LSHR:
443   case TargetOpcode::G_ASHR:
444     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
445     // common case, splitting this into a move and a 32-bit shift is faster and
446     // the same code size.
447     return Helper.tryCombineShiftToUnmerge(MI, 32);
448   }
449 
450   return false;
451 }
452 
453 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
454 #include "AMDGPUGenPostLegalizeGICombiner.inc"
455 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
456 
457 // Pass boilerplate
458 // ================
459 
460 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
461 public:
462   static char ID;
463 
464   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
465 
466   StringRef getPassName() const override {
467     return "AMDGPUPostLegalizerCombiner";
468   }
469 
470   bool runOnMachineFunction(MachineFunction &MF) override;
471 
472   void getAnalysisUsage(AnalysisUsage &AU) const override;
473 
474 private:
475   bool IsOptNone;
476 };
477 } // end anonymous namespace
478 
479 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
480   AU.addRequired<TargetPassConfig>();
481   AU.setPreservesCFG();
482   getSelectionDAGFallbackAnalysisUsage(AU);
483   AU.addRequired<GISelKnownBitsAnalysis>();
484   AU.addPreserved<GISelKnownBitsAnalysis>();
485   if (!IsOptNone) {
486     AU.addRequired<MachineDominatorTree>();
487     AU.addPreserved<MachineDominatorTree>();
488   }
489   MachineFunctionPass::getAnalysisUsage(AU);
490 }
491 
492 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
493     : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
494   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
495 }
496 
497 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
498   if (MF.getProperties().hasProperty(
499           MachineFunctionProperties::Property::FailedISel))
500     return false;
501   auto *TPC = &getAnalysis<TargetPassConfig>();
502   const Function &F = MF.getFunction();
503   bool EnableOpt =
504       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
505 
506   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
507   const AMDGPULegalizerInfo *LI =
508       static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
509 
510   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
511   MachineDominatorTree *MDT =
512       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
513   AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
514                                          F.hasMinSize(), LI, KB, MDT);
515   Combiner C(PCInfo, TPC);
516   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
517 }
518 
519 char AMDGPUPostLegalizerCombiner::ID = 0;
520 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
521                       "Combine AMDGPU machine instrs after legalization", false,
522                       false)
523 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
524 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
525 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
526                     "Combine AMDGPU machine instrs after legalization", false,
527                     false)
528 
529 namespace llvm {
530 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
531   return new AMDGPUPostLegalizerCombiner(IsOptNone);
532 }
533 } // end namespace llvm
534