1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // before the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 <<<<<<< HEAD 15 #include "AMDGPU.h" 16 ======= 17 #include "AMDGPULegalizerInfo.h" 18 #include "AMDGPUTargetMachine.h" 19 >>>>>>> Move Combiner to PreLegalize step 20 #include "llvm/CodeGen/GlobalISel/Combiner.h" 21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 23 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 24 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 25 #include "llvm/CodeGen/MachineDominators.h" 26 #include "llvm/CodeGen/TargetPassConfig.h" 27 #include "llvm/Target/TargetMachine.h" 28 29 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" 30 31 using namespace llvm; 32 using namespace MIPatternMatch; 33 34 class AMDGPUPreLegalizerCombinerHelper { 35 protected: 36 MachineIRBuilder &B; 37 MachineFunction &MF; 38 MachineRegisterInfo &MRI; 39 CombinerHelper &Helper; 40 41 public: 42 AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) 43 : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 44 45 struct ClampI64ToI16MatchInfo { 46 int64_t Cmp1; 47 int64_t Cmp2; 48 Register Origin; 49 }; 50 51 bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, 52 MachineFunction &MF, 53 ClampI64ToI16MatchInfo &MatchInfo); 54 55 void applyClampI64ToI16(MachineInstr &MI, 56 const ClampI64ToI16MatchInfo &MatchInfo); 57 }; 58 59 bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16( 60 MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, 61 ClampI64ToI16MatchInfo &MatchInfo) { 62 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 63 64 // we want to check if a 64-bit number gets clamped to 16-bit boundaries (or 65 // below). 66 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 67 68 if (SrcType != LLT::scalar(64)) 69 return false; 70 71 const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 72 73 if (DstType != LLT::scalar(16)) 74 return false; 75 76 MachineIRBuilder B(MI); 77 78 LLVM_DEBUG(dbgs() << "Matching Clamp i64 to i16\n"); 79 80 Register Base; 81 82 // match max / min pattern 83 if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 84 if (!mi_match(Base, MRI, m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 85 return false; 86 } 87 } 88 89 if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 90 if (!mi_match(Base, MRI, m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 91 return false; 92 } 93 } 94 95 const auto Cmp1 = MatchInfo.Cmp1; 96 const auto Cmp2 = MatchInfo.Cmp2; 97 const auto Diff = std::abs(Cmp2 - Cmp1); 98 99 // we don't need to clamp here. 100 if (Diff == 0 || Diff == 1) 101 return false; 102 103 const int64_t Min = std::numeric_limits<int16_t>::min(); 104 const int64_t Max = std::numeric_limits<int16_t>::max(); 105 106 // are we really trying to clamp against the relevant boundaries? 107 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 108 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 109 } 110 111 // We want to find a combination of instructions that 112 // gets generated when an i64 gets clamped to i16. 113 // The corresponding pattern is: 114 // G_MAX / G_MAX for i16 <= G_TRUNC i64. 115 // This can be efficiently written as following: 116 // v_cvt_pk_i16_i32 v0, v0, v1 117 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 118 119 void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16( 120 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) { 121 LLVM_DEBUG(dbgs() << "Combining MI\n"); 122 123 MachineIRBuilder B(MI); 124 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 125 126 Register Src = MatchInfo.Origin; 127 assert(MRI.getType(Src) == LLT::scalar(64)); 128 const LLT S32 = LLT::scalar(32); 129 130 auto Unmerge = B.buildUnmerge(S32, Src); 131 Register Hi32 = Unmerge->getOperand(0).getReg(); 132 Register Lo32 = Unmerge->getOperand(1).getReg(); 133 MRI.setRegClass(Hi32, &AMDGPU::VGPR_32RegClass); 134 MRI.setRegClass(Lo32, &AMDGPU::VGPR_32RegClass); 135 136 constexpr unsigned int CvtOpcode = AMDGPU::V_CVT_PK_I16_I32_e64; 137 assert(MI.getOpcode() != CvtOpcode); 138 139 const auto REG_CLASS = &AMDGPU::VGPR_32RegClass; 140 141 Register CvtDst = MRI.createVirtualRegister(REG_CLASS); 142 MRI.setType(CvtDst, S32); 143 144 auto CvtPk = B.buildInstr(CvtOpcode); 145 CvtPk.addDef(CvtDst); 146 CvtPk.addReg(Hi32); 147 CvtPk.addReg(Lo32); 148 CvtPk.setMIFlags(MI.getFlags()); 149 150 auto min = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 151 auto max = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 152 153 Register MinBoundaryDst = MRI.createVirtualRegister(REG_CLASS); 154 MRI.setType(MinBoundaryDst, S32); 155 B.buildConstant(MinBoundaryDst, min); 156 157 Register MaxBoundaryDst = MRI.createVirtualRegister(REG_CLASS); 158 MRI.setType(MaxBoundaryDst, S32); 159 B.buildConstant(MaxBoundaryDst, max); 160 161 Register MedDst = MRI.createVirtualRegister(REG_CLASS); 162 MRI.setType(MedDst, S32); 163 164 auto Med = B.buildInstr(AMDGPU::V_MED3_I32); 165 Med.addDef(MedDst); 166 Med.addReg(MinBoundaryDst); 167 Med.addReg(CvtDst); 168 Med.addReg(MaxBoundaryDst); 169 Med.setMIFlags(MI.getFlags()); 170 171 Register TruncDst = MRI.createGenericVirtualRegister(LLT::scalar(16)); 172 B.buildTrunc(TruncDst, MedDst); 173 B.buildCopy(MI.getOperand(0).getReg(), TruncDst); 174 175 MI.eraseFromParent(); 176 } 177 178 class AMDGPUPreLegalizerCombinerHelperState { 179 protected: 180 CombinerHelper &Helper; 181 AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper; 182 183 public: 184 AMDGPUPreLegalizerCombinerHelperState( 185 CombinerHelper &Helper, 186 AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper) 187 : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {} 188 }; 189 190 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 191 #include "AMDGPUGenPreLegalizeGICombiner.inc" 192 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 193 194 namespace { 195 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 196 #include "AMDGPUGenPreLegalizeGICombiner.inc" 197 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 198 199 class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo { 200 GISelKnownBits *KB; 201 MachineDominatorTree *MDT; 202 203 public: 204 AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 205 206 AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 207 const AMDGPULegalizerInfo *LI, 208 GISelKnownBits *KB, MachineDominatorTree *MDT) 209 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 210 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 211 KB(KB), MDT(MDT) { 212 if (!GeneratedRuleCfg.parseCommandLineOption()) 213 report_fatal_error("Invalid rule identifier"); 214 } 215 216 virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 217 MachineIRBuilder &B) const override; 218 }; 219 220 bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 221 MachineInstr &MI, 222 MachineIRBuilder &B) const { 223 CombinerHelper Helper(Observer, B, KB, MDT); 224 AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper); 225 AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, 226 PreLegalizerHelper); 227 228 if (Generated.tryCombineAll(Observer, MI, B, Helper)) 229 return true; 230 231 switch (MI.getOpcode()) { 232 case TargetOpcode::G_CONCAT_VECTORS: 233 return Helper.tryCombineConcatVectors(MI); 234 case TargetOpcode::G_SHUFFLE_VECTOR: 235 return Helper.tryCombineShuffleVector(MI); 236 } 237 238 return false; 239 } 240 241 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 242 #include "AMDGPUGenPreLegalizeGICombiner.inc" 243 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 244 245 // Pass boilerplate 246 // ================ 247 248 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { 249 public: 250 static char ID; 251 252 AMDGPUPreLegalizerCombiner(bool IsOptNone = false); 253 254 StringRef getPassName() const override { 255 return "AMDGPUPreLegalizerCombiner"; 256 } 257 258 bool runOnMachineFunction(MachineFunction &MF) override; 259 260 void getAnalysisUsage(AnalysisUsage &AU) const override; 261 private: 262 bool IsOptNone; 263 }; 264 } // end anonymous namespace 265 266 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 267 AU.addRequired<TargetPassConfig>(); 268 AU.setPreservesCFG(); 269 getSelectionDAGFallbackAnalysisUsage(AU); 270 AU.addRequired<GISelKnownBitsAnalysis>(); 271 AU.addPreserved<GISelKnownBitsAnalysis>(); 272 if (!IsOptNone) { 273 AU.addRequired<MachineDominatorTree>(); 274 AU.addPreserved<MachineDominatorTree>(); 275 } 276 MachineFunctionPass::getAnalysisUsage(AU); 277 } 278 279 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) 280 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 281 initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 282 } 283 284 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 285 if (MF.getProperties().hasProperty( 286 MachineFunctionProperties::Property::FailedISel)) 287 return false; 288 auto *TPC = &getAnalysis<TargetPassConfig>(); 289 const Function &F = MF.getFunction(); 290 bool EnableOpt = 291 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 292 293 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 294 const AMDGPULegalizerInfo *LI = 295 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 296 297 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 298 MachineDominatorTree *MDT = 299 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 300 AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 301 F.hasMinSize(), LI, KB, MDT); 302 Combiner C(PCInfo, TPC); 303 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 304 } 305 306 char AMDGPUPreLegalizerCombiner::ID = 0; 307 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 308 "Combine AMDGPU machine instrs before legalization", 309 false, false) 310 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 311 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 312 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 313 "Combine AMDGPU machine instrs before legalization", false, 314 false) 315 316 namespace llvm { 317 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { 318 return new AMDGPUPreLegalizerCombiner(IsOptNone); 319 } 320 } // end namespace llvm 321