1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // before the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPUCombinerHelper.h" 16 #include "AMDGPULegalizerInfo.h" 17 #include "GCNSubtarget.h" 18 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 19 #include "llvm/CodeGen/GlobalISel/CSEInfo.h" 20 #include "llvm/CodeGen/GlobalISel/Combiner.h" 21 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 22 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 23 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 24 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 25 #include "llvm/CodeGen/MachineDominators.h" 26 #include "llvm/CodeGen/TargetPassConfig.h" 27 #include "llvm/Target/TargetMachine.h" 28 29 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" 30 31 using namespace llvm; 32 using namespace MIPatternMatch; 33 34 class AMDGPUPreLegalizerCombinerHelper { 35 protected: 36 MachineIRBuilder &B; 37 MachineFunction &MF; 38 MachineRegisterInfo &MRI; 39 AMDGPUCombinerHelper &Helper; 40 41 public: 42 AMDGPUPreLegalizerCombinerHelper(MachineIRBuilder &B, 43 AMDGPUCombinerHelper &Helper) 44 : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 45 46 struct ClampI64ToI16MatchInfo { 47 int64_t Cmp1 = 0; 48 int64_t Cmp2 = 0; 49 Register Origin; 50 }; 51 52 bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, 53 MachineFunction &MF, 54 ClampI64ToI16MatchInfo &MatchInfo); 55 56 void applyClampI64ToI16(MachineInstr &MI, 57 const ClampI64ToI16MatchInfo &MatchInfo); 58 }; 59 60 bool AMDGPUPreLegalizerCombinerHelper::matchClampI64ToI16( 61 MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, 62 ClampI64ToI16MatchInfo &MatchInfo) { 63 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 64 65 // Try to find a pattern where an i64 value should get clamped to short. 66 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 67 if (SrcType != LLT::scalar(64)) 68 return false; 69 70 const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); 71 if (DstType != LLT::scalar(16)) 72 return false; 73 74 Register Base; 75 76 auto IsApplicableForCombine = [&MatchInfo]() -> bool { 77 const auto Cmp1 = MatchInfo.Cmp1; 78 const auto Cmp2 = MatchInfo.Cmp2; 79 const auto Diff = std::abs(Cmp2 - Cmp1); 80 81 // If the difference between both comparison values is 0 or 1, there is no 82 // need to clamp. 83 if (Diff == 0 || Diff == 1) 84 return false; 85 86 const int64_t Min = std::numeric_limits<int16_t>::min(); 87 const int64_t Max = std::numeric_limits<int16_t>::max(); 88 89 // Check if the comparison values are between SHORT_MIN and SHORT_MAX. 90 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 91 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 92 }; 93 94 // Try to match a combination of min / max MIR opcodes. 95 if (mi_match(MI.getOperand(1).getReg(), MRI, 96 m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 97 if (mi_match(Base, MRI, 98 m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 99 return IsApplicableForCombine(); 100 } 101 } 102 103 if (mi_match(MI.getOperand(1).getReg(), MRI, 104 m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { 105 if (mi_match(Base, MRI, 106 m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { 107 return IsApplicableForCombine(); 108 } 109 } 110 111 return false; 112 } 113 114 // We want to find a combination of instructions that 115 // gets generated when an i64 gets clamped to i16. 116 // The corresponding pattern is: 117 // G_MAX / G_MAX for i16 <= G_TRUNC i64. 118 // This can be efficiently written as following: 119 // v_cvt_pk_i16_i32 v0, v0, v1 120 // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 121 void AMDGPUPreLegalizerCombinerHelper::applyClampI64ToI16( 122 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) { 123 124 Register Src = MatchInfo.Origin; 125 assert(MI.getParent()->getParent()->getRegInfo().getType(Src) == 126 LLT::scalar(64)); 127 const LLT S32 = LLT::scalar(32); 128 129 B.setMBB(*MI.getParent()); 130 B.setInstrAndDebugLoc(MI); 131 132 auto Unmerge = B.buildUnmerge(S32, Src); 133 134 assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); 135 136 const LLT V2S16 = LLT::fixed_vector(2, 16); 137 auto CvtPk = 138 B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16}, 139 {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags()); 140 141 auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 142 auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 143 auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); 144 auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); 145 146 auto Bitcast = B.buildBitcast({S32}, CvtPk); 147 148 auto Med3 = B.buildInstr( 149 AMDGPU::G_AMDGPU_SMED3, {S32}, 150 {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)}, 151 MI.getFlags()); 152 153 B.buildTrunc(MI.getOperand(0).getReg(), Med3); 154 155 MI.eraseFromParent(); 156 } 157 158 class AMDGPUPreLegalizerCombinerHelperState { 159 protected: 160 AMDGPUCombinerHelper &Helper; 161 AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper; 162 163 public: 164 AMDGPUPreLegalizerCombinerHelperState( 165 AMDGPUCombinerHelper &Helper, 166 AMDGPUPreLegalizerCombinerHelper &PreLegalizerHelper) 167 : Helper(Helper), PreLegalizerHelper(PreLegalizerHelper) {} 168 }; 169 170 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 171 #include "AMDGPUGenPreLegalizeGICombiner.inc" 172 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 173 174 namespace { 175 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 176 #include "AMDGPUGenPreLegalizeGICombiner.inc" 177 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 178 179 class AMDGPUPreLegalizerCombinerInfo final : public CombinerInfo { 180 GISelKnownBits *KB; 181 MachineDominatorTree *MDT; 182 183 public: 184 AMDGPUGenPreLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 185 186 AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 187 GISelKnownBits *KB, MachineDominatorTree *MDT) 188 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 189 /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), 190 KB(KB), MDT(MDT) { 191 if (!GeneratedRuleCfg.parseCommandLineOption()) 192 report_fatal_error("Invalid rule identifier"); 193 } 194 195 virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 196 MachineIRBuilder &B) const override; 197 }; 198 199 bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 200 MachineInstr &MI, 201 MachineIRBuilder &B) const { 202 AMDGPUCombinerHelper Helper(Observer, B, KB, MDT); 203 AMDGPUPreLegalizerCombinerHelper PreLegalizerHelper(B, Helper); 204 AMDGPUGenPreLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, 205 PreLegalizerHelper); 206 207 if (Generated.tryCombineAll(Observer, MI, B)) 208 return true; 209 210 switch (MI.getOpcode()) { 211 case TargetOpcode::G_CONCAT_VECTORS: 212 return Helper.tryCombineConcatVectors(MI); 213 case TargetOpcode::G_SHUFFLE_VECTOR: 214 return Helper.tryCombineShuffleVector(MI); 215 } 216 217 return false; 218 } 219 220 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 221 #include "AMDGPUGenPreLegalizeGICombiner.inc" 222 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 223 224 // Pass boilerplate 225 // ================ 226 227 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { 228 public: 229 static char ID; 230 231 AMDGPUPreLegalizerCombiner(bool IsOptNone = false); 232 233 StringRef getPassName() const override { 234 return "AMDGPUPreLegalizerCombiner"; 235 } 236 237 bool runOnMachineFunction(MachineFunction &MF) override; 238 239 void getAnalysisUsage(AnalysisUsage &AU) const override; 240 private: 241 bool IsOptNone; 242 }; 243 } // end anonymous namespace 244 245 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 246 AU.addRequired<TargetPassConfig>(); 247 AU.setPreservesCFG(); 248 getSelectionDAGFallbackAnalysisUsage(AU); 249 AU.addRequired<GISelKnownBitsAnalysis>(); 250 AU.addPreserved<GISelKnownBitsAnalysis>(); 251 if (!IsOptNone) { 252 AU.addRequired<MachineDominatorTree>(); 253 AU.addPreserved<MachineDominatorTree>(); 254 } 255 256 AU.addRequired<GISelCSEAnalysisWrapperPass>(); 257 AU.addPreserved<GISelCSEAnalysisWrapperPass>(); 258 MachineFunctionPass::getAnalysisUsage(AU); 259 } 260 261 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) 262 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 263 initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 264 } 265 266 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 267 if (MF.getProperties().hasProperty( 268 MachineFunctionProperties::Property::FailedISel)) 269 return false; 270 auto *TPC = &getAnalysis<TargetPassConfig>(); 271 const Function &F = MF.getFunction(); 272 bool EnableOpt = 273 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 274 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 275 MachineDominatorTree *MDT = 276 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 277 AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 278 F.hasMinSize(), KB, MDT); 279 // Enable CSE. 280 GISelCSEAnalysisWrapper &Wrapper = 281 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); 282 auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); 283 284 Combiner C(PCInfo, TPC); 285 return C.combineMachineInstrs(MF, CSEInfo); 286 } 287 288 char AMDGPUPreLegalizerCombiner::ID = 0; 289 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 290 "Combine AMDGPU machine instrs before legalization", 291 false, false) 292 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 293 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 294 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 295 "Combine AMDGPU machine instrs before legalization", false, 296 false) 297 298 namespace llvm { 299 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { 300 return new AMDGPUPreLegalizerCombiner(IsOptNone); 301 } 302 } // end namespace llvm 303