1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // before the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPULegalizerInfo.h" 16 #include "GCNSubtarget.h" 17 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 18 #include "llvm/CodeGen/GlobalISel/Combiner.h" 19 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 20 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 21 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 22 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 23 #include "llvm/CodeGen/MachineDominators.h" 24 #include "llvm/CodeGen/TargetPassConfig.h" 25 #include "llvm/Target/TargetMachine.h" 26 27 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" 28 29 using namespace llvm; 30 using namespace MIPatternMatch; 31 32 class AMDGPUPreLegalizerCombinerHelper { 33 protected: 34 MachineIRBuilder &B; 35 MachineFunction &MF; 36 MachineRegisterInfo &MRI; 37 CombinerHelper &Helper; 38 39 public: 40 AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) 41 : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 42 43 struct ClampI64ToI16MatchInfo { 44 int64_t Cmp1; 45 int64_t Cmp2; 46 Register Origin; 47 }; 48 49 bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, 50 MachineFunction &MF, 51 ClampI64ToI16MatchInfo &MatchInfo); 52 53 void applyClampI64ToI16(MachineInstr &MI, 54 const ClampI64ToI16MatchInfo &MatchInfo); 55 }; 56 57 bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16( 58 MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, 59 ClampI64ToI16MatchInfo &MatchInfo) { 60 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 61 62 // Try to find a pattern where an i64 value should get clamped to short. 63 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 64 if (SrcType != LLT::scalar(64)) 65 return false; 66 67 const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 68 if (DstType != LLT::scalar(16)) 69 return false; 70 71 Register Base; 72 73 // Try to match a combination of min / max MIR opcodes. 74 if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 75 if (!mi_match(Base, MRI, m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 76 return false; 77 } 78 } 79 80 if (mi_match(MI.getOperand(1).getReg(), MRI, m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 81 if (!mi_match(Base, MRI, m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 82 return false; 83 } 84 } 85 86 const auto Cmp1 = MatchInfo.Cmp1; 87 const auto Cmp2 = MatchInfo.Cmp2; 88 const auto Diff = std::abs(Cmp2 - Cmp1); 89 90 // If the difference between both comparison values is 0 or 1, there is no 91 // need to clamp. 92 if (Diff == 0 || Diff == 1) 93 return false; 94 95 const int64_t Min = std::numeric_limits<int16_t>::min(); 96 const int64_t Max = std::numeric_limits<int16_t>::max(); 97 98 // Check if the comparison values are between SHORT_MIN and SHORT_MAX. 99 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 100 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 101 } 102 103 // We want to find a combination of instructions that 104 // gets generated when an i64 gets clamped to i16. 105 // The corresponding pattern is: 106 // G_MAX / G_MAX for i16 <= G_TRUNC i64. 107 // This can be efficiently written as following: 108 // v_cvt_pk_i16_i32 v0, v0, v1 109 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 110 void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16( 111 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) { 112 113 Register Src = MatchInfo.Origin; 114 assert(MI.getParent()->getParent()->getRegInfo().getType(Src) == 115 LLT::scalar(64)); 116 const LLT S32 = LLT::scalar(32); 117 118 B.setMBB(*MI.getParent()); 119 B.setInstrAndDebugLoc(MI); 120 121 auto Unmerge = B.buildUnmerge(S32, Src); 122 123 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); 124 125 const LLT V2S16 = LLT::vector(2, 16); 126 auto CvtPk = B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, 127 {V2S16}, 128 {Unmerge.getReg(0), Unmerge.getReg(1)}, 129 MI.getFlags()); 130 131 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 132 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 133 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); 134 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); 135 136 auto Bitcast = B.buildBitcast({S32}, CvtPk); 137 138 auto Med3 = B.buildInstr(AMDGPU::G_AMDGPU_MED3, 139 {S32}, 140 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)}, 141 MI.getFlags()); 142 143 B.buildTrunc(MI.getOperand(0).getReg(), Med3); 144 145 MI.eraseFromParent(); 146 } 147 148 class AMDGPUPreLegalizerCombinerHelperState { 149 protected: 150 CombinerHelper &Helper; 151 AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper; 152 153 public: 154 AMDGPUPreLegalizerCombinerHelperState( 155 CombinerHelper &Helper, 156 AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper) 157 : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {} 158 }; 159 160 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 161 #include "AMDGPUGenPreLegalizeGICombiner.inc" 162 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 163 164 namespace { 165 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 166 #include "AMDGPUGenPreLegalizeGICombiner.inc" 167 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 168 169 class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo { 170 GISelKnownBits *KB; 171 MachineDominatorTree *MDT; 172 173 public: 174 AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 175 176 AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 177 GISelKnownBits *KB, MachineDominatorTree *MDT) 178 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 179 /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), 180 KB(KB), MDT(MDT) { 181 if (!GeneratedRuleCfg.parseCommandLineOption()) 182 report_fatal_error("Invalid rule identifier"); 183 } 184 185 virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 186 MachineIRBuilder &B) const override; 187 }; 188 189 bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 190 MachineInstr &MI, 191 MachineIRBuilder &B) const { 192 CombinerHelper Helper(Observer, B, KB, MDT); 193 AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper); 194 AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, 195 PreLegalizerHelper); 196 197 if (Generated.tryCombineAll(Observer, MI, B, Helper)) 198 return true; 199 200 switch (MI.getOpcode()) { 201 case TargetOpcode::G_CONCAT_VECTORS: 202 return Helper.tryCombineConcatVectors(MI); 203 case TargetOpcode::G_SHUFFLE_VECTOR: 204 return Helper.tryCombineShuffleVector(MI); 205 } 206 207 return false; 208 } 209 210 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 211 #include "AMDGPUGenPreLegalizeGICombiner.inc" 212 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 213 214 // Pass boilerplate 215 // ================ 216 217 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { 218 public: 219 static char ID; 220 221 AMDGPUPreLegalizerCombiner(bool IsOptNone = false); 222 223 StringRef getPassName() const override { 224 return "AMDGPUPreLegalizerCombiner"; 225 } 226 227 bool runOnMachineFunction(MachineFunction &MF) override; 228 229 void getAnalysisUsage(AnalysisUsage &AU) const override; 230 private: 231 bool IsOptNone; 232 }; 233 } // end anonymous namespace 234 235 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 236 AU.addRequired<TargetPassConfig>(); 237 AU.setPreservesCFG(); 238 getSelectionDAGFallbackAnalysisUsage(AU); 239 AU.addRequired<GISelKnownBitsAnalysis>(); 240 AU.addPreserved<GISelKnownBitsAnalysis>(); 241 if (!IsOptNone) { 242 AU.addRequired<MachineDominatorTree>(); 243 AU.addPreserved<MachineDominatorTree>(); 244 } 245 MachineFunctionPass::getAnalysisUsage(AU); 246 } 247 248 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) 249 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 250 initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 251 } 252 253 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 254 if (MF.getProperties().hasProperty( 255 MachineFunctionProperties::Property::FailedISel)) 256 return false; 257 auto *TPC = &getAnalysis<TargetPassConfig>(); 258 const Function &F = MF.getFunction(); 259 bool EnableOpt = 260 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 261 262 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 263 MachineDominatorTree *MDT = 264 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 265 AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 266 F.hasMinSize(), KB, MDT); 267 Combiner C(PCInfo, TPC); 268 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 269 } 270 271 char AMDGPUPreLegalizerCombiner::ID = 0; 272 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 273 "Combine AMDGPU machine instrs before legalization", 274 false, false) 275 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 276 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 277 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 278 "Combine AMDGPU machine instrs before legalization", false, 279 false) 280 281 namespace llvm { 282 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { 283 return new AMDGPUPreLegalizerCombiner(IsOptNone); 284 } 285 } // end namespace llvm 286