1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // before the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUTargetMachine.h"
15 #include "llvm/CodeGen/GlobalISel/Combiner.h"
16 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
17 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
18 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
19 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
20 #include "llvm/CodeGen/MachineDominators.h"
21 #include "llvm/CodeGen/MachineFunctionPass.h"
22 #include "llvm/CodeGen/TargetPassConfig.h"
23 #include "llvm/Support/Debug.h"
24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
25 
26 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
27 
28 using namespace llvm;
29 using namespace MIPatternMatch;
30 
31 struct FMinFMaxLegacyInfo {
32   Register LHS;
33   Register RHS;
34   Register True;
35   Register False;
36   CmpInst::Predicate Pred;
37 };
38 
39 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize
40 static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI,
41                                 MachineFunction &MF, FMinFMaxLegacyInfo &Info) {
42   // FIXME: Combines should have subtarget predicates, and we shouldn't need
43   // this here.
44   if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy())
45     return false;
46 
47   // FIXME: Type predicate on pattern
48   if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32))
49     return false;
50 
51   Register Cond = MI.getOperand(1).getReg();
52   if (!MRI.hasOneNonDBGUse(Cond) ||
53       !mi_match(Cond, MRI,
54                 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS))))
55     return false;
56 
57   Info.True = MI.getOperand(2).getReg();
58   Info.False = MI.getOperand(3).getReg();
59 
60   if (!(Info.LHS == Info.True && Info.RHS == Info.False) &&
61       !(Info.LHS == Info.False && Info.RHS == Info.True))
62     return false;
63 
64   switch (Info.Pred) {
65   case CmpInst::FCMP_FALSE:
66   case CmpInst::FCMP_OEQ:
67   case CmpInst::FCMP_ONE:
68   case CmpInst::FCMP_ORD:
69   case CmpInst::FCMP_UNO:
70   case CmpInst::FCMP_UEQ:
71   case CmpInst::FCMP_UNE:
72   case CmpInst::FCMP_TRUE:
73     return false;
74   default:
75     return true;
76   }
77 }
78 
79 static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI,
80                                               const FMinFMaxLegacyInfo &Info) {
81 
82   auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) {
83     MachineIRBuilder MIB(MI);
84     MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags());
85   };
86 
87   switch (Info.Pred) {
88   case CmpInst::FCMP_ULT:
89   case CmpInst::FCMP_ULE:
90     if (Info.LHS == Info.True)
91       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
92     else
93       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
94     break;
95   case CmpInst::FCMP_OLE:
96   case CmpInst::FCMP_OLT: {
97     // We need to permute the operands to get the correct NaN behavior. The
98     // selected operand is the second one based on the failing compare with NaN,
99     // so permute it based on the compare type the hardware uses.
100     if (Info.LHS == Info.True)
101       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
102     else
103       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
104     break;
105   }
106   case CmpInst::FCMP_UGE:
107   case CmpInst::FCMP_UGT: {
108     if (Info.LHS == Info.True)
109       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS);
110     else
111       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS);
112     break;
113   }
114   case CmpInst::FCMP_OGT:
115   case CmpInst::FCMP_OGE: {
116     if (Info.LHS == Info.True)
117       buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS);
118     else
119       buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS);
120     break;
121   }
122   default:
123     llvm_unreachable("predicate should not have matched");
124   }
125 
126   MI.eraseFromParent();
127 }
128 
129 
130 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
131 #include "AMDGPUGenGICombiner.inc"
132 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
133 
134 namespace {
135 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
136 #include "AMDGPUGenGICombiner.inc"
137 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
138 
139 class AMDGPUPreLegalizerCombinerInfo : public CombinerInfo {
140   GISelKnownBits *KB;
141   MachineDominatorTree *MDT;
142 
143 public:
144   AMDGPUGenPreLegalizerCombinerHelper Generated;
145 
146   AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
147                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
148       : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
149                      /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
150         KB(KB), MDT(MDT) {
151     if (!Generated.parseCommandLineOption())
152       report_fatal_error("Invalid rule identifier");
153   }
154 
155   virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
156                        MachineIRBuilder &B) const override;
157 };
158 
159 bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
160                                               MachineInstr &MI,
161                                               MachineIRBuilder &B) const {
162   CombinerHelper Helper(Observer, B, KB, MDT);
163 
164   if (Generated.tryCombineAll(Observer, MI, B, Helper))
165     return true;
166 
167   switch (MI.getOpcode()) {
168   case TargetOpcode::G_SHL:
169   case TargetOpcode::G_LSHR:
170   case TargetOpcode::G_ASHR:
171     // On some subtargets, 64-bit shift is a quarter rate instruction. In the
172     // common case, splitting this into a move and a 32-bit shift is faster and
173     // the same code size.
174     return Helper.tryCombineShiftToUnmerge(MI, 32);
175   case TargetOpcode::G_CONCAT_VECTORS:
176     return Helper.tryCombineConcatVectors(MI);
177   case TargetOpcode::G_SHUFFLE_VECTOR:
178     return Helper.tryCombineShuffleVector(MI);
179   }
180 
181   return false;
182 }
183 
184 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
185 #include "AMDGPUGenGICombiner.inc"
186 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
187 
188 // Pass boilerplate
189 // ================
190 
191 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
192 public:
193   static char ID;
194 
195   AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
196 
197   StringRef getPassName() const override { return "AMDGPUPreLegalizerCombiner"; }
198 
199   bool runOnMachineFunction(MachineFunction &MF) override;
200 
201   void getAnalysisUsage(AnalysisUsage &AU) const override;
202 private:
203   bool IsOptNone;
204 };
205 } // end anonymous namespace
206 
207 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
208   AU.addRequired<TargetPassConfig>();
209   AU.setPreservesCFG();
210   getSelectionDAGFallbackAnalysisUsage(AU);
211   AU.addRequired<GISelKnownBitsAnalysis>();
212   AU.addPreserved<GISelKnownBitsAnalysis>();
213   if (!IsOptNone) {
214     AU.addRequired<MachineDominatorTree>();
215     AU.addPreserved<MachineDominatorTree>();
216   }
217   MachineFunctionPass::getAnalysisUsage(AU);
218 }
219 
220 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
221   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
222   initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
223 }
224 
225 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
226   if (MF.getProperties().hasProperty(
227           MachineFunctionProperties::Property::FailedISel))
228     return false;
229   auto *TPC = &getAnalysis<TargetPassConfig>();
230   const Function &F = MF.getFunction();
231   bool EnableOpt =
232       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
233   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
234   MachineDominatorTree *MDT =
235       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
236   AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
237                                         F.hasMinSize(), KB, MDT);
238   Combiner C(PCInfo, TPC);
239   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
240 }
241 
242 char AMDGPUPreLegalizerCombiner::ID = 0;
243 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
244                       "Combine AMDGPU machine instrs before legalization",
245                       false, false)
246 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
247 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
248 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
249                     "Combine AMDGPU machine instrs before legalization", false,
250                     false)
251 
252 namespace llvm {
253 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) {
254   return new AMDGPUPreLegalizerCombiner(IsOptNone);
255 }
256 } // end namespace llvm
257