1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass does combining of machine instructions at the generic MI level,
10 // before the legalizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 <<<<<<< HEAD
15 <<<<<<< HEAD
16 #include "AMDGPU.h"
17 <<<<<<< HEAD
18 =======
19 #include "AMDGPULegalizerInfo.h"
20 =======
21 >>>>>>> Added and used new target pseudo for v_cvt_pk_i16_i32, changes due to code review.
22 #include "AMDGPUTargetMachine.h"
23 >>>>>>> Move Combiner to PreLegalize step
24 =======
25 #include "AMDGPULegalizerInfo.h"
26 #include "GCNSubtarget.h"
27 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
28 >>>>>>> Added missing includes.
29 #include "llvm/CodeGen/GlobalISel/Combiner.h"
30 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
31 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h"
32 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
33 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
34 #include "llvm/CodeGen/MachineDominators.h"
35 #include "llvm/CodeGen/TargetPassConfig.h"
36 #include "llvm/Target/TargetMachine.h"
37 
38 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner"
39 
40 using namespace llvm;
41 using namespace MIPatternMatch;
42 
43 class AMDGPUPreLegalizerCombinerHelper {
44 protected:
45   MachineIRBuilder &B;
46   MachineFunction &MF;
47   MachineRegisterInfo &MRI;
48   CombinerHelper &Helper;
49 
50 public:
51   AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper)
52       : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){};
53 
54   struct ClampI64ToI16MatchInfo {
55     int64_t Cmp1;
56     int64_t Cmp2;
57     Register Origin;
58   };
59 
60   bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI,
61                           MachineFunction &MF,
62                           ClampI64ToI16MatchInfo &MatchInfo);
63 
64   void applyClampI64ToI16(MachineInstr &MI,
65                           const ClampI64ToI16MatchInfo &MatchInfo);
66 };
67 
68 bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16(
69     MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF,
70     ClampI64ToI16MatchInfo &MatchInfo) {
71   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!");
72 
73   // Try to find a pattern where an i64 value should get clamped to short.
74   const LLT SrcType = MRI.getType(MI.getOperand(1).getReg());
75   if (SrcType != LLT::scalar(64))
76     return false;
77 
78   const LLT DstType = MRI.getType(MI.getOperand(0).getReg());
79   if (DstType != LLT::scalar(16))
80     return false;
81 
82   Register Base;
83 
84   // Try to match a combination of min / max MIR opcodes.
85   if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
86     if (!mi_match(Base, MRI, m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
87       return false;
88     }
89   }
90 
91   if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) {
92     if (!mi_match(Base, MRI, m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) {
93       return false;
94     }
95   }
96 
97   const auto Cmp1 = MatchInfo.Cmp1;
98   const auto Cmp2 = MatchInfo.Cmp2;
99   const auto Diff = std::abs(Cmp2 - Cmp1);
100 
101   // If the difference between both comparison values is 0 or 1, there is no
102   // need to clamp.
103   if (Diff == 0 || Diff == 1)
104     return false;
105 
106   const int64_t Min = std::numeric_limits<int16_t>::min();
107   const int64_t Max = std::numeric_limits<int16_t>::max();
108 
109   // Check if the comparison values are between SHORT_MIN and SHORT_MAX.
110   return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) ||
111           (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min));
112 }
113 
114 // We want to find a combination of instructions that
115 // gets generated when an i64 gets clamped to i16.
116 // The corresponding pattern is:
117 // G_MAX / G_MAX for i16 <= G_TRUNC i64.
118 // This can be efficiently written as following:
119 // v_cvt_pk_i16_i32 v0, v0, v1
120 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max
121 void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16(
122     MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) {
123   MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo();
124 
125   Register Src = MatchInfo.Origin;
126   assert(MRI.getType(Src) == LLT::scalar(64));
127   const LLT S32 = LLT::scalar(32);
128 
129   B.setMBB(*MI.getParent());
130   B.setInstrAndDebugLoc(MI);
131 
132   auto Unmerge = B.buildUnmerge(S32, Src);
133 
134   assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32);
135 
136   const LLT V2S16 = LLT::vector(2, 16);
137   auto CvtPk = B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32,
138     {V2S16},
139     {Unmerge.getReg(0), Unmerge.getReg(1)},
140     MI.getFlags());
141 
142   auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2);
143   auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2);
144   auto MinBoundaryDst = B.buildConstant(S32, MinBoundary);
145   auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary);
146 
147   auto Bitcast = B.buildBitcast({S32}, CvtPk);
148 
149   auto Med3 = B.buildInstr(AMDGPU::G_AMDGPU_MED3,
150     {S32},
151     {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)},
152     MI.getFlags());
153 
154   B.buildTrunc(MI.getOperand(0).getReg(), Med3);
155 
156   MI.eraseFromParent();
157 }
158 
159 class AMDGPUPreLegalizerCombinerHelperState {
160 protected:
161   CombinerHelper &Helper;
162   AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper;
163 
164 public:
165   AMDGPUPreLegalizerCombinerHelperState(
166       CombinerHelper &Helper,
167       AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper)
168       : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {}
169 };
170 
171 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
172 #include "AMDGPUGenPreLegalizeGICombiner.inc"
173 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS
174 
175 namespace {
176 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
177 #include "AMDGPUGenPreLegalizeGICombiner.inc"
178 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H
179 
180 class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo {
181   GISelKnownBits *KB;
182   MachineDominatorTree *MDT;
183 
184 public:
185   AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg;
186 
187   AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize,
188                                   GISelKnownBits *KB, MachineDominatorTree *MDT)
189       : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false,
190                      /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize),
191         KB(KB), MDT(MDT) {
192     if (!GeneratedRuleCfg.parseCommandLineOption())
193       report_fatal_error("Invalid rule identifier");
194   }
195 
196   virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI,
197                        MachineIRBuilder &B) const override;
198 };
199 
200 bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer,
201                                               MachineInstr &MI,
202                                               MachineIRBuilder &B) const {
203   CombinerHelper Helper(Observer, B, KB, MDT);
204   AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper);
205   AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper,
206                                                  PreLegalizerHelper);
207 
208   if (Generated.tryCombineAll(Observer, MI, B, Helper))
209     return true;
210 
211   switch (MI.getOpcode()) {
212   case TargetOpcode::G_CONCAT_VECTORS:
213     return Helper.tryCombineConcatVectors(MI);
214   case TargetOpcode::G_SHUFFLE_VECTOR:
215     return Helper.tryCombineShuffleVector(MI);
216   }
217 
218   return false;
219 }
220 
221 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
222 #include "AMDGPUGenPreLegalizeGICombiner.inc"
223 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP
224 
225 // Pass boilerplate
226 // ================
227 
228 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass {
229 public:
230   static char ID;
231 
232   AMDGPUPreLegalizerCombiner(bool IsOptNone = false);
233 
234   StringRef getPassName() const override {
235     return "AMDGPUPreLegalizerCombiner";
236   }
237 
238   bool runOnMachineFunction(MachineFunction &MF) override;
239 
240   void getAnalysisUsage(AnalysisUsage &AU) const override;
241 private:
242   bool IsOptNone;
243 };
244 } // end anonymous namespace
245 
246 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
247   AU.addRequired<TargetPassConfig>();
248   AU.setPreservesCFG();
249   getSelectionDAGFallbackAnalysisUsage(AU);
250   AU.addRequired<GISelKnownBitsAnalysis>();
251   AU.addPreserved<GISelKnownBitsAnalysis>();
252   if (!IsOptNone) {
253     AU.addRequired<MachineDominatorTree>();
254     AU.addPreserved<MachineDominatorTree>();
255   }
256   MachineFunctionPass::getAnalysisUsage(AU);
257 }
258 
259 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone)
260   : MachineFunctionPass(ID), IsOptNone(IsOptNone) {
261   initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry());
262 }
263 
264 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) {
265   if (MF.getProperties().hasProperty(
266           MachineFunctionProperties::Property::FailedISel))
267     return false;
268   auto *TPC = &getAnalysis<TargetPassConfig>();
269   const Function &F = MF.getFunction();
270   bool EnableOpt =
271       MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F);
272 
273   GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF);
274   MachineDominatorTree *MDT =
275       IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>();
276   AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(),
277                                         F.hasMinSize(), KB, MDT);
278   Combiner C(PCInfo, TPC);
279   return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr);
280 }
281 
282 char AMDGPUPreLegalizerCombiner::ID = 0;
283 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
284                       "Combine AMDGPU machine instrs before legalization",
285                       false, false)
286 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
287 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis)
288 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE,
289                     "Combine AMDGPU machine instrs before legalization", false,
290                     false)
291 
292 namespace llvm {
293 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) {
294   return new AMDGPUPreLegalizerCombiner(IsOptNone);
295 }
296 } // end namespace llvm
297