1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // before the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 <<<<<<< HEAD 15 <<<<<<< HEAD 16 #include "AMDGPU.h" 17 <<<<<<< HEAD 18 ======= 19 #include "AMDGPULegalizerInfo.h" 20 ======= 21 >>>>>>> Added and used new target pseudo for v_cvt_pk_i16_i32, changes due to code review. 22 #include "AMDGPUTargetMachine.h" 23 >>>>>>> Move Combiner to PreLegalize step 24 ======= 25 #include "AMDGPULegalizerInfo.h" 26 #include "GCNSubtarget.h" 27 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 28 >>>>>>> Added missing includes. 29 #include "llvm/CodeGen/GlobalISel/Combiner.h" 30 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 31 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 32 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 33 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 34 #include "llvm/CodeGen/MachineDominators.h" 35 #include "llvm/CodeGen/TargetPassConfig.h" 36 #include "llvm/Target/TargetMachine.h" 37 38 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" 39 40 using namespace llvm; 41 using namespace MIPatternMatch; 42 43 class AMDGPUPreLegalizerCombinerHelper { 44 protected: 45 MachineIRBuilder &B; 46 MachineFunction &MF; 47 MachineRegisterInfo &MRI; 48 CombinerHelper &Helper; 49 50 public: 51 AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) 52 : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 53 54 struct ClampI64ToI16MatchInfo { 55 int64_t Cmp1; 56 int64_t Cmp2; 57 Register Origin; 58 }; 59 60 bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, 61 MachineFunction &MF, 62 ClampI64ToI16MatchInfo &MatchInfo); 63 64 void applyClampI64ToI16(MachineInstr &MI, 65 const ClampI64ToI16MatchInfo &MatchInfo); 66 }; 67 68 bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16( 69 MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, 70 ClampI64ToI16MatchInfo &MatchInfo) { 71 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 72 73 // Try to find a pattern where an i64 value should get clamped to short. 74 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 75 if (SrcType != LLT::scalar(64)) 76 return false; 77 78 const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 79 if (DstType != LLT::scalar(16)) 80 return false; 81 82 Register Base; 83 84 // Try to match a combination of min / max MIR opcodes. 85 if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 86 if (!mi_match(Base, MRI, m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 87 return false; 88 } 89 } 90 91 if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 92 if (!mi_match(Base, MRI, m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 93 return false; 94 } 95 } 96 97 const auto Cmp1 = MatchInfo.Cmp1; 98 const auto Cmp2 = MatchInfo.Cmp2; 99 const auto Diff = std::abs(Cmp2 - Cmp1); 100 101 // If the difference between both comparison values is 0 or 1, there is no 102 // need to clamp. 103 if (Diff == 0 || Diff == 1) 104 return false; 105 106 const int64_t Min = std::numeric_limits<int16_t>::min(); 107 const int64_t Max = std::numeric_limits<int16_t>::max(); 108 109 // Check if the comparison values are between SHORT_MIN and SHORT_MAX. 110 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 111 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 112 } 113 114 // We want to find a combination of instructions that 115 // gets generated when an i64 gets clamped to i16. 116 // The corresponding pattern is: 117 // G_MAX / G_MAX for i16 <= G_TRUNC i64. 118 // This can be efficiently written as following: 119 // v_cvt_pk_i16_i32 v0, v0, v1 120 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 121 void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16( 122 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) { 123 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 124 125 Register Src = MatchInfo.Origin; 126 assert(MRI.getType(Src) == LLT::scalar(64)); 127 const LLT S32 = LLT::scalar(32); 128 129 B.setMBB(*MI.getParent()); 130 B.setInstrAndDebugLoc(MI); 131 132 auto Unmerge = B.buildUnmerge(S32, Src); 133 134 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); 135 136 const LLT V2S16 = LLT::vector(2, 16); 137 auto CvtPk = B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, 138 {V2S16}, 139 {Unmerge.getReg(0), Unmerge.getReg(1)}, 140 MI.getFlags()); 141 142 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 143 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 144 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); 145 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); 146 147 auto Bitcast = B.buildBitcast({S32}, CvtPk); 148 149 auto Med3 = B.buildInstr(AMDGPU::G_AMDGPU_MED3, 150 {S32}, 151 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)}, 152 MI.getFlags()); 153 154 B.buildTrunc(MI.getOperand(0).getReg(), Med3); 155 156 MI.eraseFromParent(); 157 } 158 159 class AMDGPUPreLegalizerCombinerHelperState { 160 protected: 161 CombinerHelper &Helper; 162 AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper; 163 164 public: 165 AMDGPUPreLegalizerCombinerHelperState( 166 CombinerHelper &Helper, 167 AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper) 168 : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {} 169 }; 170 171 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 172 #include "AMDGPUGenPreLegalizeGICombiner.inc" 173 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 174 175 namespace { 176 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 177 #include "AMDGPUGenPreLegalizeGICombiner.inc" 178 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 179 180 class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo { 181 GISelKnownBits *KB; 182 MachineDominatorTree *MDT; 183 184 public: 185 AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 186 187 AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 188 GISelKnownBits *KB, MachineDominatorTree *MDT) 189 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 190 /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), 191 KB(KB), MDT(MDT) { 192 if (!GeneratedRuleCfg.parseCommandLineOption()) 193 report_fatal_error("Invalid rule identifier"); 194 } 195 196 virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 197 MachineIRBuilder &B) const override; 198 }; 199 200 bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 201 MachineInstr &MI, 202 MachineIRBuilder &B) const { 203 CombinerHelper Helper(Observer, B, KB, MDT); 204 AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper); 205 AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, 206 PreLegalizerHelper); 207 208 if (Generated.tryCombineAll(Observer, MI, B, Helper)) 209 return true; 210 211 switch (MI.getOpcode()) { 212 case TargetOpcode::G_CONCAT_VECTORS: 213 return Helper.tryCombineConcatVectors(MI); 214 case TargetOpcode::G_SHUFFLE_VECTOR: 215 return Helper.tryCombineShuffleVector(MI); 216 } 217 218 return false; 219 } 220 221 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 222 #include "AMDGPUGenPreLegalizeGICombiner.inc" 223 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 224 225 // Pass boilerplate 226 // ================ 227 228 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { 229 public: 230 static char ID; 231 232 AMDGPUPreLegalizerCombiner(bool IsOptNone = false); 233 234 StringRef getPassName() const override { 235 return "AMDGPUPreLegalizerCombiner"; 236 } 237 238 bool runOnMachineFunction(MachineFunction &MF) override; 239 240 void getAnalysisUsage(AnalysisUsage &AU) const override; 241 private: 242 bool IsOptNone; 243 }; 244 } // end anonymous namespace 245 246 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 247 AU.addRequired<TargetPassConfig>(); 248 AU.setPreservesCFG(); 249 getSelectionDAGFallbackAnalysisUsage(AU); 250 AU.addRequired<GISelKnownBitsAnalysis>(); 251 AU.addPreserved<GISelKnownBitsAnalysis>(); 252 if (!IsOptNone) { 253 AU.addRequired<MachineDominatorTree>(); 254 AU.addPreserved<MachineDominatorTree>(); 255 } 256 MachineFunctionPass::getAnalysisUsage(AU); 257 } 258 259 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) 260 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 261 initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 262 } 263 264 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 265 if (MF.getProperties().hasProperty( 266 MachineFunctionProperties::Property::FailedISel)) 267 return false; 268 auto *TPC = &getAnalysis<TargetPassConfig>(); 269 const Function &F = MF.getFunction(); 270 bool EnableOpt = 271 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 272 273 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 274 MachineDominatorTree *MDT = 275 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 276 AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 277 F.hasMinSize(), KB, MDT); 278 Combiner C(PCInfo, TPC); 279 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 280 } 281 282 char AMDGPUPreLegalizerCombiner::ID = 0; 283 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 284 "Combine AMDGPU machine instrs before legalization", 285 false, false) 286 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 287 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 288 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 289 "Combine AMDGPU machine instrs before legalization", false, 290 false) 291 292 namespace llvm { 293 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { 294 return new AMDGPUPreLegalizerCombiner(IsOptNone); 295 } 296 } // end namespace llvm 297