1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // after the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUTargetMachine.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "llvm/CodeGen/GlobalISel/Combiner.h"
17 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
18 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
19 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
20 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
21 #include "llvm/CodeGen/MachineDominators.h"
22 #include "llvm/CodeGen/MachineFunctionPass.h"
23 #include "llvm/CodeGen/TargetPassConfig.h"
24 #include "llvm/Support/Debug.h"
25 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
26 
27 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner"
28 
29 using namespace llvm;
30 using namespace MIPatternMatch;
31 
32 struct FMinFMaxLegacyInfo {
33   Register LHS;
34   Register RHS;
35   Register True;
36   Register False;
37   CmpInst::Predicate Pred;
38 };
39 
40 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
41 static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI,
42                                 MachineFunction &MF, FMinFMaxLegacyInfo &Info) {
43   // FIXME: Combines should have subtarget predicates, and we shouldn't need
44   // this here.
45   if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
46     return false;
47 
48   // FIXME: Type predicate on pattern
49   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
50     return false;
51 
52   Register Cond = MI.getOperand(1).getReg();
53   if (!MRI.hasOneNonDBGUse(Cond) ||
54       !mi_match(Cond, MRI,
55                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
56     return false;
57 
58   Info.True = MI.getOperand(2).getReg();
59   Info.False = MI.getOperand(3).getReg();
60 
61   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
62       !(Info.LHS == Info.False && Info.RHS == Info.True))
63     return false;
64 
65   switch (Info.Pred) {
66   case CmpInst::FCMP_FALSE:
67   case CmpInst::FCMP_OEQ:
68   case CmpInst::FCMP_ONE:
69   case CmpInst::FCMP_ORD:
70   case CmpInst::FCMP_UNO:
71   case CmpInst::FCMP_UEQ:
72   case CmpInst::FCMP_UNE:
73   case CmpInst::FCMP_TRUE:
74     return false;
75   default:
76     return true;
77   }
78 }
79 
80 static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
81                                               const FMinFMaxLegacyInfo &Info) {
82 
83   auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) {
84     MachineIRBuilder MIB(MI);
85     MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
86   };
87 
88   switch (Info.Pred) {
89   case CmpInst::FCMP_ULT:
90   case CmpInst::FCMP_ULE:
91     if (Info.LHS == Info.True)
92       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
93     else
94       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
95     break;
96   case CmpInst::FCMP_OLE:
97   case CmpInst::FCMP_OLT: {
98     // We need to permute the operands to get the correct NaN behavior. The
99     // selected operand is the second one based on the failing compare with NaN,
100     // so permute it based on the compare type the hardware uses.
101     if (Info.LHS == Info.True)
102       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
103     else
104       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
105     break;
106   }
107   case CmpInst::FCMP_UGE:
108   case CmpInst::FCMP_UGT: {
109     if (Info.LHS == Info.True)
110       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
111     else
112       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
113     break;
114   }
115   case CmpInst::FCMP_OGT:
116   case CmpInst::FCMP_OGE: {
117     if (Info.LHS == Info.True)
118       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
119     else
120       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
121     break;
122   }
123   default:
124     llvm_unreachable("predicate should not have matched");
125   }
126 
127   MI.eraseFromParent();
128 }
129 
130 
131 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
132 #include "AMDGPUGenPostLegalizeGICombiner.inc"
133 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
134 
135 namespace {
136 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
137 #include "AMDGPUGenPostLegalizeGICombiner.inc"
138 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
139 
140 class AMDGPUPostLegalizerCombinerInfo : public CombinerInfo {
141   GISelKnownBits *KB;
142   MachineDominatorTree *MDT;
143 
144 public:
145   AMDGPUGenPostLegalizerCombinerHelper Generated;
146 
147   AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
148                                   const AMDGPULegalizerInfo *LI,
149                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
150       : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true,
151                      /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize),
152         KB(KB), MDT(MDT) {
153     if (!Generated.parseCommandLineOption())
154       report_fatal_error("Invalid rule identifier");
155   }
156 
157   virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
158                        MachineIRBuilder &B) const override;
159 };
160 
161 bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
162                                               MachineInstr &MI,
163                                               MachineIRBuilder &B) const {
164   CombinerHelper Helper(Observer, B, KB, MDT);
165 
166   if (Generated.tryCombineAll(Observer, MI, B, Helper))
167     return true;
168 
169   switch (MI.getOpcode()) {
170   case TargetOpcode::G_SHL:
171   case TargetOpcode::G_LSHR:
172   case TargetOpcode::G_ASHR:
173     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
174     // common case, splitting this into a move and a 32-bit shift is faster and
175     // the same code size.
176     return Helper.tryCombineShiftToUnmerge(MI, 32);
177   }
178 
179   return false;
180 }
181 
182 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
183 #include "AMDGPUGenPostLegalizeGICombiner.inc"
184 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
185 
186 // Pass boilerplate
187 // ================
188 
189 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass {
190 public:
191   static char ID;
192 
193   AMDGPUPostLegalizerCombiner(bool IsOptNone = false);
194 
195   StringRef getPassName() const override {
196     return "AMDGPUPostLegalizerCombiner";
197   }
198 
199   bool runOnMachineFunction(MachineFunction &MF) override;
200 
201   void getAnalysisUsage(AnalysisUsage &AU) const override;
202 private:
203   bool IsOptNone;
204 };
205 } // end anonymous namespace
206 
207 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
208   AU.addRequired<TargetPassConfig>();
209   AU.setPreservesCFG();
210   getSelectionDAGFallbackAnalysisUsage(AU);
211   AU.addRequired<GISelKnownBitsAnalysis>();
212   AU.addPreserved<GISelKnownBitsAnalysis>();
213   if (!IsOptNone) {
214     AU.addRequired<MachineDominatorTree>();
215     AU.addPreserved<MachineDominatorTree>();
216   }
217   MachineFunctionPass::getAnalysisUsage(AU);
218 }
219 
220 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone)
221   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
222   initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry());
223 }
224 
225 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
226   if (MF.getProperties().hasProperty(
227           MachineFunctionProperties::Property::FailedISel))
228     return false;
229   auto *TPC = &getAnalysis<TargetPassConfig>();
230   const Function &F = MF.getFunction();
231   bool EnableOpt =
232       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
233 
234   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
235   const AMDGPULegalizerInfo *LI
236     = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo());
237 
238   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
239   MachineDominatorTree *MDT =
240       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
241   AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
242                                          F.hasMinSize(), LI, KB, MDT);
243   Combiner C(PCInfo, TPC);
244   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
245 }
246 
247 char AMDGPUPostLegalizerCombiner::ID = 0;
248 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
249                       "Combine AMDGPU machine instrs after legalization",
250                       false, false)
251 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
252 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
253 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE,
254                     "Combine AMDGPU machine instrs after legalization", false,
255                     false)
256 
257 namespace llvm {
258 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) {
259   return new AMDGPUPostLegalizerCombiner(IsOptNone);
260 }
261 } // end namespace llvm
262