1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp 2 //---------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass does combining of machine instructions at the generic MI level, 11 // after the legalizer. 12 // 13 //===----------------------------------------------------------------------===// 14 15 <<<<<<< HEAD 16 #include "AMDGPU.h" 17 #include "AMDGPULegalizerInfo.h" 18 #include "GCNSubtarget.h" 19 ======= 20 #include "AMDGPULegalizerInfo.h" 21 #include "AMDGPUTargetMachine.h" 22 >>>>>>> clang-format 23 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 24 #include "llvm/CodeGen/GlobalISel/Combiner.h" 25 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 26 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 27 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 28 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 29 #include "llvm/CodeGen/MachineDominators.h" 30 #include "llvm/CodeGen/TargetPassConfig.h" 31 <<<<<<< HEAD 32 #include "llvm/Target/TargetMachine.h" 33 ======= 34 #include "llvm/Support/Debug.h" 35 >>>>>>> clang-format 36 37 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" 38 39 using namespace llvm; 40 using namespace MIPatternMatch; 41 42 class AMDGPUPostLegalizerCombinerHelper { 43 protected: 44 MachineIRBuilder &B; 45 MachineFunction &MF; 46 MachineRegisterInfo &MRI; 47 CombinerHelper &Helper; 48 49 public: 50 AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) 51 : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 52 53 struct FMinFMaxLegacyInfo { 54 Register LHS; 55 Register RHS; 56 Register True; 57 Register False; 58 CmpInst::Predicate Pred; 59 }; 60 61 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 62 bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info); 63 void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, 64 const FMinFMaxLegacyInfo &Info); 65 66 bool matchUCharToFloat(MachineInstr &MI); 67 void applyUCharToFloat(MachineInstr &MI); 68 69 // FIXME: Should be able to have 2 separate matchdatas rather than custom 70 // struct boilerplate. 71 struct CvtF32UByteMatchInfo { 72 Register CvtVal; 73 unsigned ShiftOffset; 74 }; 75 76 bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo); 77 void applyCvtF32UByteN(MachineInstr &MI, 78 const CvtF32UByteMatchInfo &MatchInfo); 79 80 struct ClampI64ToI16MatchInfo { 81 int64_t Cmp1; 82 int64_t Cmp2; 83 Register Origin; 84 }; 85 86 bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, 87 MachineFunction &MF, 88 ClampI64ToI16MatchInfo &MatchInfo); 89 90 void applyClampI64ToI16(MachineInstr &MI, 91 const ClampI64ToI16MatchInfo &MatchInfo); 92 }; 93 94 bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy( 95 MachineInstr &MI, FMinFMaxLegacyInfo &Info) { 96 // FIXME: Combines should have subtarget predicates, and we shouldn't need 97 // this here. 98 if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy()) 99 return false; 100 101 // FIXME: Type predicate on pattern 102 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) 103 return false; 104 105 Register Cond = MI.getOperand(1).getReg(); 106 if (!MRI.hasOneNonDBGUse(Cond) || 107 !mi_match(Cond, MRI, 108 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) 109 return false; 110 111 Info.True = MI.getOperand(2).getReg(); 112 Info.False = MI.getOperand(3).getReg(); 113 114 if (!(Info.LHS == Info.True && Info.RHS == Info.False) && 115 !(Info.LHS == Info.False && Info.RHS == Info.True)) 116 return false; 117 118 switch (Info.Pred) { 119 case CmpInst::FCMP_FALSE: 120 case CmpInst::FCMP_OEQ: 121 case CmpInst::FCMP_ONE: 122 case CmpInst::FCMP_ORD: 123 case CmpInst::FCMP_UNO: 124 case CmpInst::FCMP_UEQ: 125 case CmpInst::FCMP_UNE: 126 case CmpInst::FCMP_TRUE: 127 return false; 128 default: 129 return true; 130 } 131 } 132 133 void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy( 134 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) { 135 B.setInstrAndDebugLoc(MI); 136 auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) { 137 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 138 }; 139 140 switch (Info.Pred) { 141 case CmpInst::FCMP_ULT: 142 case CmpInst::FCMP_ULE: 143 if (Info.LHS == Info.True) 144 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 145 else 146 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 147 break; 148 case CmpInst::FCMP_OLE: 149 case CmpInst::FCMP_OLT: { 150 // We need to permute the operands to get the correct NaN behavior. The 151 // selected operand is the second one based on the failing compare with NaN, 152 // so permute it based on the compare type the hardware uses. 153 if (Info.LHS == Info.True) 154 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 155 else 156 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 157 break; 158 } 159 case CmpInst::FCMP_UGE: 160 case CmpInst::FCMP_UGT: { 161 if (Info.LHS == Info.True) 162 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 163 else 164 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 165 break; 166 } 167 case CmpInst::FCMP_OGT: 168 case CmpInst::FCMP_OGE: { 169 if (Info.LHS == Info.True) 170 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 171 else 172 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 173 break; 174 } 175 default: 176 llvm_unreachable("predicate should not have matched"); 177 } 178 179 MI.eraseFromParent(); 180 } 181 182 bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) { 183 Register DstReg = MI.getOperand(0).getReg(); 184 185 // TODO: We could try to match extracting the higher bytes, which would be 186 // easier if i8 vectors weren't promoted to i32 vectors, particularly after 187 // types are legalized. v4i8 -> v4f32 is probably the only case to worry 188 // about in practice. 189 LLT Ty = MRI.getType(DstReg); 190 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { 191 Register SrcReg = MI.getOperand(1).getReg(); 192 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); 193 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64); 194 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8); 195 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask); 196 } 197 198 return false; 199 } 200 201 void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) { 202 B.setInstrAndDebugLoc(MI); 203 204 const LLT S32 = LLT::scalar(32); 205 206 Register DstReg = MI.getOperand(0).getReg(); 207 Register SrcReg = MI.getOperand(1).getReg(); 208 LLT Ty = MRI.getType(DstReg); 209 LLT SrcTy = MRI.getType(SrcReg); 210 if (SrcTy != S32) 211 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); 212 213 if (Ty == S32) { 214 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg}, 215 MI.getFlags()); 216 } else { 217 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg}, 218 MI.getFlags()); 219 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); 220 } 221 222 MI.eraseFromParent(); 223 } 224 225 bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN( 226 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) { 227 Register SrcReg = MI.getOperand(1).getReg(); 228 229 // Look through G_ZEXT. 230 mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg))); 231 232 Register Src0; 233 int64_t ShiftAmt; 234 bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt))); 235 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) { 236 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0; 237 238 unsigned ShiftOffset = 8 * Offset; 239 if (IsShr) 240 ShiftOffset += ShiftAmt; 241 else 242 ShiftOffset -= ShiftAmt; 243 244 MatchInfo.CvtVal = Src0; 245 MatchInfo.ShiftOffset = ShiftOffset; 246 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0; 247 } 248 249 // TODO: Simplify demanded bits. 250 return false; 251 } 252 253 void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN( 254 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) { 255 B.setInstrAndDebugLoc(MI); 256 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; 257 258 const LLT S32 = LLT::scalar(32); 259 Register CvtSrc = MatchInfo.CvtVal; 260 LLT SrcTy = MRI.getType(MatchInfo.CvtVal); 261 if (SrcTy != S32) { 262 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8); 263 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); 264 } 265 266 assert(MI.getOpcode() != NewOpc); 267 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags()); 268 MI.eraseFromParent(); 269 } 270 271 bool AMDGPUPostLegalizerCombinerHelper::matchClampI64ToI16( 272 MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, 273 ClampI64ToI16MatchInfo &MatchInfo) { 274 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 275 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 276 277 // we want to check if a 64-bit number gets clamped to 16-bit boundaries (or 278 // below). 279 if (SrcType != LLT::scalar(64)) 280 return false; 281 282 MachineIRBuilder B(MI); 283 284 LLVM_DEBUG(dbgs() << "Matching Clamp i64 to i16"); 285 286 CmpInst::Predicate Predicate1; 287 Register Base; 288 289 if (!mi_match(MI.getOperand(1).getReg(), MRI, m_GISelect(m_GICmp(m_Pred(Predicate1), m_Reg(), m_Reg()), m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) 290 return false; 291 292 CmpInst::Predicate Predicate2; 293 294 if (!mi_match(Base, MRI, m_GISelect(m_GICmp(m_Pred(Predicate2), m_Reg(), m_Reg()), m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) 295 return false; 296 297 if ((Predicate1 == CmpInst::ICMP_SLT && 298 Predicate2 == CmpInst::ICMP_SGT) || 299 (Predicate1 == CmpInst::ICMP_SGT && 300 Predicate2 == CmpInst::ICMP_SLT)) { 301 const auto Cmp1 = MatchInfo.Cmp1; 302 const auto Cmp2 = MatchInfo.Cmp2; 303 const auto Diff = std::abs(Cmp2 - Cmp1); 304 305 // we don't need to clamp here. 306 if (Diff == 0 || Diff == 1) { 307 return false; 308 } 309 310 const int64_t Min = std::numeric_limits<int16_t>::min(); 311 const int64_t Max = std::numeric_limits<int16_t>::max(); 312 313 // are we really trying to clamp against the relevant boundaries? 314 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 315 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 316 } 317 318 return false; 319 } 320 321 /** 322 * We want to find a combination of instructions that 323 * gets generated when an i64 gets clamped to i16. 324 * The corresponding pattern is: 325 * G_SELECT MIN/MAX, G_ICMP, G_SELECT MIN/MAX, G_ICMP, G_TRUNC. 326 * This can be efficiently written as following: 327 * v_cvt_pk_i16_i32 v0, v0, v1 328 * v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 329 */ 330 void AMDGPUPostLegalizerCombinerHelper::applyClampI64ToI16( 331 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) { 332 LLVM_DEBUG(dbgs() << "Combining MI"); 333 334 MachineIRBuilder B(MI); 335 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 336 337 Register Src = MatchInfo.Origin; 338 assert(MRI.getType(Src) == LLT::scalar(64)); 339 const LLT S32 = LLT::scalar(32); 340 341 auto Unmerge = B.buildUnmerge(S32, Src); 342 Register Hi32 = Unmerge->getOperand(0).getReg(); 343 Register Lo32 = Unmerge->getOperand(1).getReg(); 344 MRI.setRegClass(Hi32, &AMDGPU::VGPR_32RegClass); 345 MRI.setRegClass(Lo32, &AMDGPU::VGPR_32RegClass); 346 347 constexpr unsigned int CvtOpcode = AMDGPU::V_CVT_PK_I16_I32_e64; 348 assert(MI.getOpcode() != CvtOpcode); 349 350 const auto REG_CLASS = &AMDGPU::VGPR_32RegClass; 351 352 Register CvtDst = MRI.createVirtualRegister(REG_CLASS); 353 MRI.setType(CvtDst, S32); 354 355 auto CvtPk = B.buildInstr(CvtOpcode); 356 CvtPk.addDef(CvtDst); 357 CvtPk.addReg(Hi32); 358 CvtPk.addReg(Lo32); 359 CvtPk.setMIFlags(MI.getFlags()); 360 361 auto min = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 362 auto max = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 363 364 Register MinBoundaryDst = MRI.createVirtualRegister(REG_CLASS); 365 MRI.setType(MinBoundaryDst, S32); 366 B.buildConstant(MinBoundaryDst, min); 367 368 Register MaxBoundaryDst = MRI.createVirtualRegister(REG_CLASS); 369 MRI.setType(MaxBoundaryDst, S32); 370 B.buildConstant(MaxBoundaryDst, max); 371 372 Register MedDst = MRI.createVirtualRegister(REG_CLASS); 373 MRI.setType(MedDst, S32); 374 375 auto Med = B.buildInstr(AMDGPU::V_MED3_I32); 376 Med.addDef(MedDst); 377 Med.addReg(MinBoundaryDst); 378 Med.addReg(CvtDst); 379 Med.addReg(MaxBoundaryDst); 380 Med.setMIFlags(MI.getFlags()); 381 382 B.buildCopy(MI.getOperand(0).getReg(), MedDst); 383 384 MI.eraseFromParent(); 385 } 386 387 class AMDGPUPostLegalizerCombinerHelperState { 388 protected: 389 CombinerHelper &Helper; 390 AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper; 391 392 public: 393 AMDGPUPostLegalizerCombinerHelperState( 394 CombinerHelper &Helper, 395 AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper) 396 : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {} 397 }; 398 399 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 400 #include "AMDGPUGenPostLegalizeGICombiner.inc" 401 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 402 403 namespace { 404 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 405 #include "AMDGPUGenPostLegalizeGICombiner.inc" 406 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 407 408 class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo { 409 GISelKnownBits *KB; 410 MachineDominatorTree *MDT; 411 412 public: 413 AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 414 415 AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 416 const AMDGPULegalizerInfo *LI, 417 GISelKnownBits *KB, MachineDominatorTree *MDT) 418 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 419 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 420 KB(KB), MDT(MDT) { 421 if (!GeneratedRuleCfg.parseCommandLineOption()) 422 report_fatal_error("Invalid rule identifier"); 423 } 424 425 bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 426 MachineIRBuilder &B) const override; 427 }; 428 429 bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 430 MachineInstr &MI, 431 MachineIRBuilder &B) const { 432 CombinerHelper Helper(Observer, B, KB, MDT, LInfo); 433 AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper); 434 AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, 435 PostLegalizerHelper); 436 437 if (Generated.tryCombineAll(Observer, MI, B)) 438 return true; 439 440 switch (MI.getOpcode()) { 441 case TargetOpcode::G_SHL: 442 case TargetOpcode::G_LSHR: 443 case TargetOpcode::G_ASHR: 444 // On some subtargets, 64-bit shift is a quarter rate instruction. In the 445 // common case, splitting this into a move and a 32-bit shift is faster and 446 // the same code size. 447 return Helper.tryCombineShiftToUnmerge(MI, 32); 448 } 449 450 return false; 451 } 452 453 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 454 #include "AMDGPUGenPostLegalizeGICombiner.inc" 455 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 456 457 // Pass boilerplate 458 // ================ 459 460 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { 461 public: 462 static char ID; 463 464 AMDGPUPostLegalizerCombiner(bool IsOptNone = false); 465 466 StringRef getPassName() const override { 467 return "AMDGPUPostLegalizerCombiner"; 468 } 469 470 bool runOnMachineFunction(MachineFunction &MF) override; 471 472 void getAnalysisUsage(AnalysisUsage &AU) const override; 473 474 private: 475 bool IsOptNone; 476 }; 477 } // end anonymous namespace 478 479 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 480 AU.addRequired<TargetPassConfig>(); 481 AU.setPreservesCFG(); 482 getSelectionDAGFallbackAnalysisUsage(AU); 483 AU.addRequired<GISelKnownBitsAnalysis>(); 484 AU.addPreserved<GISelKnownBitsAnalysis>(); 485 if (!IsOptNone) { 486 AU.addRequired<MachineDominatorTree>(); 487 AU.addPreserved<MachineDominatorTree>(); 488 } 489 MachineFunctionPass::getAnalysisUsage(AU); 490 } 491 492 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) 493 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 494 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 495 } 496 497 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 498 if (MF.getProperties().hasProperty( 499 MachineFunctionProperties::Property::FailedISel)) 500 return false; 501 auto *TPC = &getAnalysis<TargetPassConfig>(); 502 const Function &F = MF.getFunction(); 503 bool EnableOpt = 504 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 505 506 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 507 const AMDGPULegalizerInfo *LI = 508 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 509 510 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 511 MachineDominatorTree *MDT = 512 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 513 AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 514 F.hasMinSize(), LI, KB, MDT); 515 Combiner C(PCInfo, TPC); 516 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 517 } 518 519 char AMDGPUPostLegalizerCombiner::ID = 0; 520 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 521 "Combine AMDGPU machine instrs after legalization", false, 522 false) 523 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 524 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 525 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 526 "Combine AMDGPU machine instrs after legalization", false, 527 false) 528 529 namespace llvm { 530 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { 531 return new AMDGPUPostLegalizerCombiner(IsOptNone); 532 } 533 } // end namespace llvm 534