1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // before the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "AMDGPULegalizerInfo.h"
16 #include "GCNSubtarget.h"
17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
18 #include "llvm/CodeGen/GlobalISel/Combiner.h"
19 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
20 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
21 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
23 #include "llvm/CodeGen/MachineDominators.h"
24 #include "llvm/CodeGen/TargetPassConfig.h"
25 #include "llvm/Target/TargetMachine.h"
26 
27 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
28 
29 using namespace llvm;
30 using namespace MIPatternMatch;
31 
32 class AMDGPUPreLegalizerCombinerHelper {
33 protected:
34   MachineIRBuilder &B;
35   MachineFunction &MF;
36   MachineRegisterInfo &MRI;
37   CombinerHelper &Helper;
38 
39 public:
40   AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
41       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
42 
43   struct ClampI64ToI16MatchInfo {
44     int64_t Cmp1;
45     int64_t Cmp2;
46     Register Origin;
47   };
48 
49   bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI,
50                           MachineFunction &MF,
51                           ClampI64ToI16MatchInfo &MatchInfo);
52 
53   void applyClampI64ToI16(MachineInstr &MI,
54                           const ClampI64ToI16MatchInfo &MatchInfo);
55 };
56 
57 bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16(
58     MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF,
59     ClampI64ToI16MatchInfo &MatchInfo) {
60   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
61 
62   // Try to find a pattern where an i64 value should get clamped to short.
63   const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
64   if (SrcType != LLT::scalar(64))
65     return false;
66 
67   const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
68   if (DstType != LLT::scalar(16))
69     return false;
70 
71   Register Base;
72 
73   // Try to match a combination of min / max MIR opcodes.
74   if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
75     if (!mi_match(Base, MRI, m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
76       return false;
77     }
78   }
79 
80   if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
81     if (!mi_match(Base, MRI, m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
82       return false;
83     }
84   }
85 
86   const auto Cmp1 = MatchInfo.Cmp1;
87   const auto Cmp2 = MatchInfo.Cmp2;
88   const auto Diff = std::abs(Cmp2 - Cmp1);
89 
90   // If the difference between both comparison values is 0 or 1, there is no
91   // need to clamp.
92   if (Diff == 0 || Diff == 1)
93     return false;
94 
95   const int64_t Min = std::numeric_limits<int16_t>::min();
96   const int64_t Max = std::numeric_limits<int16_t>::max();
97 
98   // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
99   return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
100           (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
101 }
102 
103 // We want to find a combination of instructions that
104 // gets generated when an i64 gets clamped to i16.
105 // The corresponding pattern is:
106 // G_MAX / G_MAX for i16 <= G_TRUNC i64.
107 // This can be efficiently written as following:
108 // v_cvt_pk_i16_i32 v0, v0, v1
109 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
110 void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
111     MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) {
112 
113   Register Src = MatchInfo.Origin;
114   assert(MI.getParent()->getParent()->getRegInfo().getType(Src) ==
115          LLT::scalar(64));
116   const LLT S32 = LLT::scalar(32);
117 
118   B.setMBB(*MI.getParent());
119   B.setInstrAndDebugLoc(MI);
120 
121   auto Unmerge = B.buildUnmerge(S32, Src);
122 
123   assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
124 
125   const LLT V2S16 = LLT::vector(2, 16);
126   auto CvtPk = B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32,
127     {V2S16},
128     {Unmerge.getReg(0), Unmerge.getReg(1)},
129     MI.getFlags());
130 
131   auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
132   auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
133   auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
134   auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
135 
136   auto Bitcast = B.buildBitcast({S32}, CvtPk);
137 
138   auto Med3 = B.buildInstr(AMDGPU::G_AMDGPU_MED3,
139     {S32},
140     {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
141     MI.getFlags());
142 
143   B.buildTrunc(MI.getOperand(0).getReg(), Med3);
144 
145   MI.eraseFromParent();
146 }
147 
148 class AMDGPUPreLegalizerCombinerHelperState {
149 protected:
150   CombinerHelper &Helper;
151   AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper;
152 
153 public:
154   AMDGPUPreLegalizerCombinerHelperState(
155       CombinerHelper &Helper,
156       AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
157       : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {}
158 };
159 
160 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
161 #include "AMDGPUGenPreLegalizeGICombiner.inc"
162 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
163 
164 namespace {
165 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
166 #include "AMDGPUGenPreLegalizeGICombiner.inc"
167 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
168 
169 class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo {
170   GISelKnownBits *KB;
171   MachineDominatorTree *MDT;
172 
173 public:
174   AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
175 
176   AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
177                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
178       : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
179                      /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
180         KB(KB), MDT(MDT) {
181     if (!GeneratedRuleCfg.parseCommandLineOption())
182       report_fatal_error("Invalid rule identifier");
183   }
184 
185   virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
186                        MachineIRBuilder &B) const override;
187 };
188 
189 bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
190                                               MachineInstr &MI,
191                                               MachineIRBuilder &B) const {
192   CombinerHelper Helper(Observer, B, KB, MDT);
193   AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper);
194   AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
195                                                  PreLegalizerHelper);
196 
197   if (Generated.tryCombineAll(Observer, MI, B, Helper))
198     return true;
199 
200   switch (MI.getOpcode()) {
201   case TargetOpcode::G_CONCAT_VECTORS:
202     return Helper.tryCombineConcatVectors(MI);
203   case TargetOpcode::G_SHUFFLE_VECTOR:
204     return Helper.tryCombineShuffleVector(MI);
205   }
206 
207   return false;
208 }
209 
210 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
211 #include "AMDGPUGenPreLegalizeGICombiner.inc"
212 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
213 
214 // Pass boilerplate
215 // ================
216 
217 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
218 public:
219   static char ID;
220 
221   AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
222 
223   StringRef getPassName() const override {
224     return "AMDGPUPreLegalizerCombiner";
225   }
226 
227   bool runOnMachineFunction(MachineFunction &MF) override;
228 
229   void getAnalysisUsage(AnalysisUsage &AU) const override;
230 private:
231   bool IsOptNone;
232 };
233 } // end anonymous namespace
234 
235 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
236   AU.addRequired<TargetPassConfig>();
237   AU.setPreservesCFG();
238   getSelectionDAGFallbackAnalysisUsage(AU);
239   AU.addRequired<GISelKnownBitsAnalysis>();
240   AU.addPreserved<GISelKnownBitsAnalysis>();
241   if (!IsOptNone) {
242     AU.addRequired<MachineDominatorTree>();
243     AU.addPreserved<MachineDominatorTree>();
244   }
245   MachineFunctionPass::getAnalysisUsage(AU);
246 }
247 
248 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
249   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
250   initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
251 }
252 
253 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
254   if (MF.getProperties().hasProperty(
255           MachineFunctionProperties::Property::FailedISel))
256     return false;
257   auto *TPC = &getAnalysis<TargetPassConfig>();
258   const Function &F = MF.getFunction();
259   bool EnableOpt =
260       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
261 
262   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
263   MachineDominatorTree *MDT =
264       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
265   AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
266                                         F.hasMinSize(), KB, MDT);
267   Combiner C(PCInfo, TPC);
268   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
269 }
270 
271 char AMDGPUPreLegalizerCombiner::ID = 0;
272 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
273                       "Combine AMDGPU machine instrs before legalization",
274                       false, false)
275 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
276 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
277 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
278                     "Combine AMDGPU machine instrs before legalization", false,
279                     false)
280 
281 namespace llvm {
282 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) {
283   return new AMDGPUPreLegalizerCombiner(IsOptNone);
284 }
285 } // end namespace llvm
286