1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // after the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUTargetMachine.h" 15 #include "AMDGPULegalizerInfo.h" 16 #include "llvm/CodeGen/GlobalISel/Combiner.h" 17 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 18 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 19 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 20 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 21 #include "llvm/CodeGen/MachineDominators.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/TargetPassConfig.h" 24 #include "llvm/Support/Debug.h" 25 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 26 27 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" 28 29 using namespace llvm; 30 using namespace MIPatternMatch; 31 32 struct FMinFMaxLegacyInfo { 33 Register LHS; 34 Register RHS; 35 Register True; 36 Register False; 37 CmpInst::Predicate Pred; 38 }; 39 40 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 41 static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI, 42 MachineFunction &MF, FMinFMaxLegacyInfo &Info) { 43 // FIXME: Combines should have subtarget predicates, and we shouldn't need 44 // this here. 45 if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy()) 46 return false; 47 48 // FIXME: Type predicate on pattern 49 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) 50 return false; 51 52 Register Cond = MI.getOperand(1).getReg(); 53 if (!MRI.hasOneNonDBGUse(Cond) || 54 !mi_match(Cond, MRI, 55 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) 56 return false; 57 58 Info.True = MI.getOperand(2).getReg(); 59 Info.False = MI.getOperand(3).getReg(); 60 61 if (!(Info.LHS == Info.True && Info.RHS == Info.False) && 62 !(Info.LHS == Info.False && Info.RHS == Info.True)) 63 return false; 64 65 switch (Info.Pred) { 66 case CmpInst::FCMP_FALSE: 67 case CmpInst::FCMP_OEQ: 68 case CmpInst::FCMP_ONE: 69 case CmpInst::FCMP_ORD: 70 case CmpInst::FCMP_UNO: 71 case CmpInst::FCMP_UEQ: 72 case CmpInst::FCMP_UNE: 73 case CmpInst::FCMP_TRUE: 74 return false; 75 default: 76 return true; 77 } 78 } 79 80 static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, 81 const FMinFMaxLegacyInfo &Info) { 82 83 auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) { 84 MachineIRBuilder MIB(MI); 85 MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 86 }; 87 88 switch (Info.Pred) { 89 case CmpInst::FCMP_ULT: 90 case CmpInst::FCMP_ULE: 91 if (Info.LHS == Info.True) 92 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 93 else 94 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 95 break; 96 case CmpInst::FCMP_OLE: 97 case CmpInst::FCMP_OLT: { 98 // We need to permute the operands to get the correct NaN behavior. The 99 // selected operand is the second one based on the failing compare with NaN, 100 // so permute it based on the compare type the hardware uses. 101 if (Info.LHS == Info.True) 102 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 103 else 104 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 105 break; 106 } 107 case CmpInst::FCMP_UGE: 108 case CmpInst::FCMP_UGT: { 109 if (Info.LHS == Info.True) 110 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 111 else 112 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 113 break; 114 } 115 case CmpInst::FCMP_OGT: 116 case CmpInst::FCMP_OGE: { 117 if (Info.LHS == Info.True) 118 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 119 else 120 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 121 break; 122 } 123 default: 124 llvm_unreachable("predicate should not have matched"); 125 } 126 127 MI.eraseFromParent(); 128 } 129 130 131 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 132 #include "AMDGPUGenPostLegalizeGICombiner.inc" 133 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 134 135 namespace { 136 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 137 #include "AMDGPUGenPostLegalizeGICombiner.inc" 138 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 139 140 class AMDGPUPostLegalizerCombinerInfo : public CombinerInfo { 141 GISelKnownBits *KB; 142 MachineDominatorTree *MDT; 143 144 public: 145 AMDGPUGenPostLegalizerCombinerHelper Generated; 146 147 AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 148 const AMDGPULegalizerInfo *LI, 149 GISelKnownBits *KB, MachineDominatorTree *MDT) 150 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 151 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 152 KB(KB), MDT(MDT) { 153 if (!Generated.parseCommandLineOption()) 154 report_fatal_error("Invalid rule identifier"); 155 } 156 157 virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 158 MachineIRBuilder &B) const override; 159 }; 160 161 bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 162 MachineInstr &MI, 163 MachineIRBuilder &B) const { 164 CombinerHelper Helper(Observer, B, KB, MDT); 165 166 if (Generated.tryCombineAll(Observer, MI, B, Helper)) 167 return true; 168 169 switch (MI.getOpcode()) { 170 case TargetOpcode::G_SHL: 171 case TargetOpcode::G_LSHR: 172 case TargetOpcode::G_ASHR: 173 // On some subtargets, 64-bit shift is a quarter rate instruction. In the 174 // common case, splitting this into a move and a 32-bit shift is faster and 175 // the same code size. 176 return Helper.tryCombineShiftToUnmerge(MI, 32); 177 } 178 179 return false; 180 } 181 182 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 183 #include "AMDGPUGenPostLegalizeGICombiner.inc" 184 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 185 186 // Pass boilerplate 187 // ================ 188 189 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { 190 public: 191 static char ID; 192 193 AMDGPUPostLegalizerCombiner(bool IsOptNone = false); 194 195 StringRef getPassName() const override { 196 return "AMDGPUPostLegalizerCombiner"; 197 } 198 199 bool runOnMachineFunction(MachineFunction &MF) override; 200 201 void getAnalysisUsage(AnalysisUsage &AU) const override; 202 private: 203 bool IsOptNone; 204 }; 205 } // end anonymous namespace 206 207 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 208 AU.addRequired<TargetPassConfig>(); 209 AU.setPreservesCFG(); 210 getSelectionDAGFallbackAnalysisUsage(AU); 211 AU.addRequired<GISelKnownBitsAnalysis>(); 212 AU.addPreserved<GISelKnownBitsAnalysis>(); 213 if (!IsOptNone) { 214 AU.addRequired<MachineDominatorTree>(); 215 AU.addPreserved<MachineDominatorTree>(); 216 } 217 MachineFunctionPass::getAnalysisUsage(AU); 218 } 219 220 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) 221 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 222 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 223 } 224 225 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 226 if (MF.getProperties().hasProperty( 227 MachineFunctionProperties::Property::FailedISel)) 228 return false; 229 auto *TPC = &getAnalysis<TargetPassConfig>(); 230 const Function &F = MF.getFunction(); 231 bool EnableOpt = 232 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 233 234 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 235 const AMDGPULegalizerInfo *LI 236 = static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 237 238 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 239 MachineDominatorTree *MDT = 240 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 241 AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 242 F.hasMinSize(), LI, KB, MDT); 243 Combiner C(PCInfo, TPC); 244 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 245 } 246 247 char AMDGPUPostLegalizerCombiner::ID = 0; 248 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 249 "Combine AMDGPU machine instrs after legalization", 250 false, false) 251 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 252 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 253 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 254 "Combine AMDGPU machine instrs after legalization", false, 255 false) 256 257 namespace llvm { 258 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { 259 return new AMDGPUPostLegalizerCombiner(IsOptNone); 260 } 261 } // end namespace llvm 262