1 //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // before the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUTargetMachine.h" 15 #include "llvm/CodeGen/GlobalISel/Combiner.h" 16 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 17 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 18 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 19 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 20 #include "llvm/CodeGen/MachineDominators.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/TargetPassConfig.h" 23 #include "llvm/Support/Debug.h" 24 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 25 26 #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" 27 28 using namespace llvm; 29 using namespace MIPatternMatch; 30 31 struct FMinFMaxLegacyInfo { 32 Register LHS; 33 Register RHS; 34 Register True; 35 Register False; 36 CmpInst::Predicate Pred; 37 }; 38 39 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 40 static bool matchFMinFMaxLegacy(MachineInstr &MI, MachineRegisterInfo &MRI, 41 MachineFunction &MF, FMinFMaxLegacyInfo &Info) { 42 // FIXME: Combines should have subtarget predicates, and we shouldn't need 43 // this here. 44 if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy()) 45 return false; 46 47 // FIXME: Type predicate on pattern 48 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) 49 return false; 50 51 Register Cond = MI.getOperand(1).getReg(); 52 if (!MRI.hasOneNonDBGUse(Cond) || 53 !mi_match(Cond, MRI, 54 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) 55 return false; 56 57 Info.True = MI.getOperand(2).getReg(); 58 Info.False = MI.getOperand(3).getReg(); 59 60 if (!(Info.LHS == Info.True && Info.RHS == Info.False) && 61 !(Info.LHS == Info.False && Info.RHS == Info.True)) 62 return false; 63 64 switch (Info.Pred) { 65 case CmpInst::FCMP_FALSE: 66 case CmpInst::FCMP_OEQ: 67 case CmpInst::FCMP_ONE: 68 case CmpInst::FCMP_ORD: 69 case CmpInst::FCMP_UNO: 70 case CmpInst::FCMP_UEQ: 71 case CmpInst::FCMP_UNE: 72 case CmpInst::FCMP_TRUE: 73 return false; 74 default: 75 return true; 76 } 77 } 78 79 static void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, 80 const FMinFMaxLegacyInfo &Info) { 81 82 auto buildNewInst = [&MI](unsigned Opc, Register X, Register Y) { 83 MachineIRBuilder MIB(MI); 84 MIB.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 85 }; 86 87 switch (Info.Pred) { 88 case CmpInst::FCMP_ULT: 89 case CmpInst::FCMP_ULE: 90 if (Info.LHS == Info.True) 91 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 92 else 93 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 94 break; 95 case CmpInst::FCMP_OLE: 96 case CmpInst::FCMP_OLT: { 97 // We need to permute the operands to get the correct NaN behavior. The 98 // selected operand is the second one based on the failing compare with NaN, 99 // so permute it based on the compare type the hardware uses. 100 if (Info.LHS == Info.True) 101 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 102 else 103 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 104 break; 105 } 106 case CmpInst::FCMP_UGE: 107 case CmpInst::FCMP_UGT: { 108 if (Info.LHS == Info.True) 109 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 110 else 111 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 112 break; 113 } 114 case CmpInst::FCMP_OGT: 115 case CmpInst::FCMP_OGE: { 116 if (Info.LHS == Info.True) 117 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 118 else 119 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 120 break; 121 } 122 default: 123 llvm_unreachable("predicate should not have matched"); 124 } 125 126 MI.eraseFromParent(); 127 } 128 129 130 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 131 #include "AMDGPUGenGICombiner.inc" 132 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 133 134 namespace { 135 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 136 #include "AMDGPUGenGICombiner.inc" 137 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 138 139 class AMDGPUPreLegalizerCombinerInfo : public CombinerInfo { 140 GISelKnownBits *KB; 141 MachineDominatorTree *MDT; 142 143 public: 144 AMDGPUGenPreLegalizerCombinerHelper Generated; 145 146 AMDGPUPreLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 147 GISelKnownBits *KB, MachineDominatorTree *MDT) 148 : CombinerInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, 149 /*LegalizerInfo*/ nullptr, EnableOpt, OptSize, MinSize), 150 KB(KB), MDT(MDT) { 151 if (!Generated.parseCommandLineOption()) 152 report_fatal_error("Invalid rule identifier"); 153 } 154 155 virtual bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 156 MachineIRBuilder &B) const override; 157 }; 158 159 bool AMDGPUPreLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 160 MachineInstr &MI, 161 MachineIRBuilder &B) const { 162 CombinerHelper Helper(Observer, B, KB, MDT); 163 164 if (Generated.tryCombineAll(Observer, MI, B, Helper)) 165 return true; 166 167 switch (MI.getOpcode()) { 168 case TargetOpcode::G_SHL: 169 case TargetOpcode::G_LSHR: 170 case TargetOpcode::G_ASHR: 171 // On some subtargets, 64-bit shift is a quarter rate instruction. In the 172 // common case, splitting this into a move and a 32-bit shift is faster and 173 // the same code size. 174 return Helper.tryCombineShiftToUnmerge(MI, 32); 175 case TargetOpcode::G_CONCAT_VECTORS: 176 return Helper.tryCombineConcatVectors(MI); 177 case TargetOpcode::G_SHUFFLE_VECTOR: 178 return Helper.tryCombineShuffleVector(MI); 179 } 180 181 return false; 182 } 183 184 #define AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 185 #include "AMDGPUGenGICombiner.inc" 186 #undef AMDGPUPRELEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 187 188 // Pass boilerplate 189 // ================ 190 191 class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { 192 public: 193 static char ID; 194 195 AMDGPUPreLegalizerCombiner(bool IsOptNone = false); 196 197 StringRef getPassName() const override { return "AMDGPUPreLegalizerCombiner"; } 198 199 bool runOnMachineFunction(MachineFunction &MF) override; 200 201 void getAnalysisUsage(AnalysisUsage &AU) const override; 202 private: 203 bool IsOptNone; 204 }; 205 } // end anonymous namespace 206 207 void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 208 AU.addRequired<TargetPassConfig>(); 209 AU.setPreservesCFG(); 210 getSelectionDAGFallbackAnalysisUsage(AU); 211 AU.addRequired<GISelKnownBitsAnalysis>(); 212 AU.addPreserved<GISelKnownBitsAnalysis>(); 213 if (!IsOptNone) { 214 AU.addRequired<MachineDominatorTree>(); 215 AU.addPreserved<MachineDominatorTree>(); 216 } 217 MachineFunctionPass::getAnalysisUsage(AU); 218 } 219 220 AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) 221 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 222 initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 223 } 224 225 bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 226 if (MF.getProperties().hasProperty( 227 MachineFunctionProperties::Property::FailedISel)) 228 return false; 229 auto *TPC = &getAnalysis<TargetPassConfig>(); 230 const Function &F = MF.getFunction(); 231 bool EnableOpt = 232 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 233 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 234 MachineDominatorTree *MDT = 235 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 236 AMDGPUPreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 237 F.hasMinSize(), KB, MDT); 238 Combiner C(PCInfo, TPC); 239 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 240 } 241 242 char AMDGPUPreLegalizerCombiner::ID = 0; 243 INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 244 "Combine AMDGPU machine instrs before legalization", 245 false, false) 246 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 247 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 248 INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, 249 "Combine AMDGPU machine instrs before legalization", false, 250 false) 251 252 namespace llvm { 253 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { 254 return new AMDGPUPreLegalizerCombiner(IsOptNone); 255 } 256 } // end namespace llvm 257