1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // before the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 <<<<<<< HEAD 15 <<<<<<< HEAD 16 #include "AMDGPU.h" 17 ======= 18 #include "AMDGPULegalizerInfo.h" 19 ======= 20 >>>>>>> Added and used new target pseudo for v_cvt_pk_i16_i32, changes due to code review. 21 #include "AMDGPUTargetMachine.h" 22 >>>>>>> Move Combiner to PreLegalize step 23 #include "llvm/CodeGen/GlobalISel/Combiner.h" 24 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 25 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 26 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 27 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 28 #include "llvm/CodeGen/MachineDominators.h" 29 #include "llvm/CodeGen/TargetPassConfig.h" 30 #include "llvm/Target/TargetMachine.h" 31 32 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" 33 34 using namespace llvm; 35 using namespace MIPatternMatch; 36 37 class AMDGPUPreLegalizerCombinerHelper { 38 protected: 39 MachineIRBuilder &B; 40 MachineFunction &MF; 41 MachineRegisterInfo &MRI; 42 CombinerHelper &Helper; 43 44 public: 45 AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) 46 : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 47 48 struct ClampI64ToI16MatchInfo { 49 int64_t Cmp1; 50 int64_t Cmp2; 51 Register Origin; 52 }; 53 54 bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, 55 MachineFunction &MF, 56 ClampI64ToI16MatchInfo &MatchInfo); 57 58 void applyClampI64ToI16(MachineInstr &MI, 59 const ClampI64ToI16MatchInfo &MatchInfo); 60 }; 61 62 bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16( 63 MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, 64 ClampI64ToI16MatchInfo &MatchInfo) { 65 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 66 67 // Try to find a pattern where an i64 value should get clamped to short. 68 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 69 if (SrcType != LLT::scalar(64)) 70 return false; 71 72 const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 73 if (DstType != LLT::scalar(16)) 74 return false; 75 76 Register Base; 77 78 // Try to match a combination of min / max MIR opcodes. 79 if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 80 if (!mi_match(Base, MRI, m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 81 return false; 82 } 83 } 84 85 if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 86 if (!mi_match(Base, MRI, m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 87 return false; 88 } 89 } 90 91 const auto Cmp1 = MatchInfo.Cmp1; 92 const auto Cmp2 = MatchInfo.Cmp2; 93 const auto Diff = std::abs(Cmp2 - Cmp1); 94 95 // If the difference between both comparison values is 0 or 1, there is no 96 // need to clamp. 97 if (Diff == 0 || Diff == 1) 98 return false; 99 100 const int64_t Min = std::numeric_limits<int16_t>::min(); 101 const int64_t Max = std::numeric_limits<int16_t>::max(); 102 103 // Check if the comparison values are between SHORT_MIN and SHORT_MAX. 104 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 105 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 106 } 107 108 // We want to find a combination of instructions that 109 // gets generated when an i64 gets clamped to i16. 110 // The corresponding pattern is: 111 // G_MAX / G_MAX for i16 <= G_TRUNC i64. 112 // This can be efficiently written as following: 113 // v_cvt_pk_i16_i32 v0, v0, v1 114 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 115 void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16( 116 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) { 117 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 118 119 Register Src = MatchInfo.Origin; 120 assert(MRI.getType(Src) == LLT::scalar(64)); 121 const LLT S32 = LLT::scalar(32); 122 123 B.setMBB(*MI.getParent()); 124 B.setInstrAndDebugLoc(MI); 125 126 auto Unmerge = B.buildUnmerge(S32, Src); 127 Register Hi32 = Unmerge.getReg(0); 128 Register Lo32 = Unmerge.getReg(1); 129 MRI.setRegClass(Hi32, &AMDGPU::VGPR_32RegClass); 130 MRI.setRegClass(Lo32, &AMDGPU::VGPR_32RegClass); 131 132 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); 133 134 Register CvtDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 135 const LLT V2S16 = LLT::vector(2, 16); 136 MRI.setType(CvtDst, V2S16); 137 138 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, 139 {CvtDst}, 140 {Hi32, Lo32}, 141 MI.getFlags()); 142 143 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 144 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 145 146 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); 147 MRI.setRegClass(MinBoundaryDst.getReg(0), &AMDGPU::VGPR_32RegClass); 148 149 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); 150 MRI.setRegClass(MaxBoundaryDst.getReg(0), &AMDGPU::VGPR_32RegClass); 151 152 Register MedDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass); 153 MRI.setType(MedDst, S32); 154 155 B.buildInstr(AMDGPU::V_MED3_I32, 156 {MedDst}, 157 {MinBoundaryDst.getReg(0), CvtDst, MaxBoundaryDst.getReg(0)}, 158 MI.getFlags()); 159 160 Register TruncDst = MRI.createGenericVirtualRegister(LLT::scalar(16)); 161 B.buildTrunc(TruncDst, MedDst); 162 B.buildCopy(MI.getOperand(0).getReg(), TruncDst); 163 164 MI.eraseFromParent(); 165 } 166 167 class AMDGPUPreLegalizerCombinerHelperState { 168 protected: 169 CombinerHelper &Helper; 170 AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper; 171 172 public: 173 AMDGPUPreLegalizerCombinerHelperState( 174 CombinerHelper &Helper, 175 AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper) 176 : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {} 177 }; 178 179 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 180 #include "AMDGPUGenPreLegalizeGICombiner.inc" 181 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 182 183 namespace { 184 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 185 #include "AMDGPUGenPreLegalizeGICombiner.inc" 186 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 187 188 class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo { 189 GISelKnownBits *KB; 190 MachineDominatorTree *MDT; 191 192 public: 193 AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 194 195 AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 196 GISelKnownBits *KB, MachineDominatorTree *MDT) 197 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 198 /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), 199 KB(KB), MDT(MDT) { 200 if (!GeneratedRuleCfg.parseCommandLineOption()) 201 report_fatal_error("Invalid rule identifier"); 202 } 203 204 virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 205 MachineIRBuilder &B) const override; 206 }; 207 208 bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 209 MachineInstr &MI, 210 MachineIRBuilder &B) const { 211 CombinerHelper Helper(Observer, B, KB, MDT); 212 AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper); 213 AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, 214 PreLegalizerHelper); 215 216 if (Generated.tryCombineAll(Observer, MI, B, Helper)) 217 return true; 218 219 switch (MI.getOpcode()) { 220 case TargetOpcode::G_CONCAT_VECTORS: 221 return Helper.tryCombineConcatVectors(MI); 222 case TargetOpcode::G_SHUFFLE_VECTOR: 223 return Helper.tryCombineShuffleVector(MI); 224 } 225 226 return false; 227 } 228 229 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 230 #include "AMDGPUGenPreLegalizeGICombiner.inc" 231 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 232 233 // Pass boilerplate 234 // ================ 235 236 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { 237 public: 238 static char ID; 239 240 AMDGPUPreLegalizerCombiner(bool IsOptNone = false); 241 242 StringRef getPassName() const override { 243 return "AMDGPUPreLegalizerCombiner"; 244 } 245 246 bool runOnMachineFunction(MachineFunction &MF) override; 247 248 void getAnalysisUsage(AnalysisUsage &AU) const override; 249 private: 250 bool IsOptNone; 251 }; 252 } // end anonymous namespace 253 254 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 255 AU.addRequired<TargetPassConfig>(); 256 AU.setPreservesCFG(); 257 getSelectionDAGFallbackAnalysisUsage(AU); 258 AU.addRequired<GISelKnownBitsAnalysis>(); 259 AU.addPreserved<GISelKnownBitsAnalysis>(); 260 if (!IsOptNone) { 261 AU.addRequired<MachineDominatorTree>(); 262 AU.addPreserved<MachineDominatorTree>(); 263 } 264 MachineFunctionPass::getAnalysisUsage(AU); 265 } 266 267 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) 268 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 269 initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 270 } 271 272 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 273 if (MF.getProperties().hasProperty( 274 MachineFunctionProperties::Property::FailedISel)) 275 return false; 276 auto *TPC = &getAnalysis<TargetPassConfig>(); 277 const Function &F = MF.getFunction(); 278 bool EnableOpt = 279 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 280 281 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 282 MachineDominatorTree *MDT = 283 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 284 AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 285 F.hasMinSize(), KB, MDT); 286 Combiner C(PCInfo, TPC); 287 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 288 } 289 290 char AMDGPUPreLegalizerCombiner::ID = 0; 291 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 292 "Combine AMDGPU machine instrs before legalization", 293 false, false) 294 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 295 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 296 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 297 "Combine AMDGPU machine instrs before legalization", false, 298 false) 299 300 namespace llvm { 301 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { 302 return new AMDGPUPreLegalizerCombiner(IsOptNone); 303 } 304 } // end namespace llvm 305