1 //=== lib/CodeGen/GlobalISel/AMDGPUPostLegalizerCombiner.cpp ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass does combining of machine instructions at the generic MI level, 10 // after the legalizer. 11 // 12 //===----------------------------------------------------------------------===// 13 14 <<<<<<< HEAD 15 #include "AMDGPU.h" 16 #include "AMDGPULegalizerInfo.h" 17 #include "GCNSubtarget.h" 18 ======= 19 #include "AMDGPULegalizerInfo.h" 20 #include "AMDGPUTargetMachine.h" 21 >>>>>>> clang-format 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "llvm/CodeGen/GlobalISel/Combiner.h" 24 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" 25 #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" 26 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" 27 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" 28 #include "llvm/CodeGen/MachineDominators.h" 29 #include "llvm/CodeGen/TargetPassConfig.h" 30 <<<<<<< HEAD 31 #include "llvm/Target/TargetMachine.h" 32 ======= 33 #include "llvm/Support/Debug.h" 34 >>>>>>> clang-format 35 36 #define DEBUG_TYPE "amdgpu-postlegalizer-combiner" 37 38 using namespace llvm; 39 using namespace MIPatternMatch; 40 41 class AMDGPUPostLegalizerCombinerHelper { 42 protected: 43 MachineIRBuilder &B; 44 MachineFunction &MF; 45 MachineRegisterInfo &MRI; 46 CombinerHelper &Helper; 47 48 public: 49 AMDGPUPostLegalizerCombinerHelper(MachineIRBuilder &B, CombinerHelper &Helper) 50 : B(B), MF(B.getMF()), MRI(*B.getMRI()), Helper(Helper){}; 51 52 struct FMinFMaxLegacyInfo { 53 Register LHS; 54 Register RHS; 55 Register True; 56 Register False; 57 CmpInst::Predicate Pred; 58 }; 59 60 // TODO: Make sure fmin_legacy/fmax_legacy don't canonicalize 61 bool matchFMinFMaxLegacy(MachineInstr &MI, FMinFMaxLegacyInfo &Info); 62 void applySelectFCmpToFMinToFMaxLegacy(MachineInstr &MI, 63 const FMinFMaxLegacyInfo &Info); 64 65 bool matchUCharToFloat(MachineInstr &MI); 66 void applyUCharToFloat(MachineInstr &MI); 67 68 // FIXME: Should be able to have 2 separate matchdatas rather than custom 69 // struct boilerplate. 70 struct CvtF32UByteMatchInfo { 71 Register CvtVal; 72 unsigned ShiftOffset; 73 }; 74 75 bool matchCvtF32UByteN(MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo); 76 void applyCvtF32UByteN(MachineInstr &MI, 77 const CvtF32UByteMatchInfo &MatchInfo); 78 79 struct ClampI64ToI16MatchInfo { 80 int64_t Cmp1; 81 int64_t Cmp2; 82 Register Origin; 83 }; 84 85 bool matchClampI64ToI16(MachineInstr &MI, MachineRegisterInfo &MRI, 86 MachineFunction &MF, 87 ClampI64ToI16MatchInfo &MatchInfo); 88 89 void applyClampI64ToI16(MachineInstr &MI, 90 const ClampI64ToI16MatchInfo &MatchInfo); 91 }; 92 93 bool AMDGPUPostLegalizerCombinerHelper::matchFMinFMaxLegacy( 94 MachineInstr &MI, FMinFMaxLegacyInfo &Info) { 95 // FIXME: Combines should have subtarget predicates, and we shouldn't need 96 // this here. 97 if (!MF.getSubtarget<GCNSubtarget>().hasFminFmaxLegacy()) 98 return false; 99 100 // FIXME: Type predicate on pattern 101 if (MRI.getType(MI.getOperand(0).getReg()) != LLT::scalar(32)) 102 return false; 103 104 Register Cond = MI.getOperand(1).getReg(); 105 if (!MRI.hasOneNonDBGUse(Cond) || 106 !mi_match(Cond, MRI, 107 m_GFCmp(m_Pred(Info.Pred), m_Reg(Info.LHS), m_Reg(Info.RHS)))) 108 return false; 109 110 Info.True = MI.getOperand(2).getReg(); 111 Info.False = MI.getOperand(3).getReg(); 112 113 if (!(Info.LHS == Info.True && Info.RHS == Info.False) && 114 !(Info.LHS == Info.False && Info.RHS == Info.True)) 115 return false; 116 117 switch (Info.Pred) { 118 case CmpInst::FCMP_FALSE: 119 case CmpInst::FCMP_OEQ: 120 case CmpInst::FCMP_ONE: 121 case CmpInst::FCMP_ORD: 122 case CmpInst::FCMP_UNO: 123 case CmpInst::FCMP_UEQ: 124 case CmpInst::FCMP_UNE: 125 case CmpInst::FCMP_TRUE: 126 return false; 127 default: 128 return true; 129 } 130 } 131 132 void AMDGPUPostLegalizerCombinerHelper::applySelectFCmpToFMinToFMaxLegacy( 133 MachineInstr &MI, const FMinFMaxLegacyInfo &Info) { 134 B.setInstrAndDebugLoc(MI); 135 auto buildNewInst = [&MI, this](unsigned Opc, Register X, Register Y) { 136 B.buildInstr(Opc, {MI.getOperand(0)}, {X, Y}, MI.getFlags()); 137 }; 138 139 switch (Info.Pred) { 140 case CmpInst::FCMP_ULT: 141 case CmpInst::FCMP_ULE: 142 if (Info.LHS == Info.True) 143 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 144 else 145 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 146 break; 147 case CmpInst::FCMP_OLE: 148 case CmpInst::FCMP_OLT: { 149 // We need to permute the operands to get the correct NaN behavior. The 150 // selected operand is the second one based on the failing compare with NaN, 151 // so permute it based on the compare type the hardware uses. 152 if (Info.LHS == Info.True) 153 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 154 else 155 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 156 break; 157 } 158 case CmpInst::FCMP_UGE: 159 case CmpInst::FCMP_UGT: { 160 if (Info.LHS == Info.True) 161 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.RHS, Info.LHS); 162 else 163 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.LHS, Info.RHS); 164 break; 165 } 166 case CmpInst::FCMP_OGT: 167 case CmpInst::FCMP_OGE: { 168 if (Info.LHS == Info.True) 169 buildNewInst(AMDGPU::G_AMDGPU_FMAX_LEGACY, Info.LHS, Info.RHS); 170 else 171 buildNewInst(AMDGPU::G_AMDGPU_FMIN_LEGACY, Info.RHS, Info.LHS); 172 break; 173 } 174 default: 175 llvm_unreachable("predicate should not have matched"); 176 } 177 178 MI.eraseFromParent(); 179 } 180 181 bool AMDGPUPostLegalizerCombinerHelper::matchUCharToFloat(MachineInstr &MI) { 182 Register DstReg = MI.getOperand(0).getReg(); 183 184 // TODO: We could try to match extracting the higher bytes, which would be 185 // easier if i8 vectors weren't promoted to i32 vectors, particularly after 186 // types are legalized. v4i8 -> v4f32 is probably the only case to worry 187 // about in practice. 188 LLT Ty = MRI.getType(DstReg); 189 if (Ty == LLT::scalar(32) || Ty == LLT::scalar(16)) { 190 Register SrcReg = MI.getOperand(1).getReg(); 191 unsigned SrcSize = MRI.getType(SrcReg).getSizeInBits(); 192 assert(SrcSize == 16 || SrcSize == 32 || SrcSize == 64); 193 const APInt Mask = APInt::getHighBitsSet(SrcSize, SrcSize - 8); 194 return Helper.getKnownBits()->maskedValueIsZero(SrcReg, Mask); 195 } 196 197 return false; 198 } 199 200 void AMDGPUPostLegalizerCombinerHelper::applyUCharToFloat(MachineInstr &MI) { 201 B.setInstrAndDebugLoc(MI); 202 203 const LLT S32 = LLT::scalar(32); 204 205 Register DstReg = MI.getOperand(0).getReg(); 206 Register SrcReg = MI.getOperand(1).getReg(); 207 LLT Ty = MRI.getType(DstReg); 208 LLT SrcTy = MRI.getType(SrcReg); 209 if (SrcTy != S32) 210 SrcReg = B.buildAnyExtOrTrunc(S32, SrcReg).getReg(0); 211 212 if (Ty == S32) { 213 B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {DstReg}, {SrcReg}, 214 MI.getFlags()); 215 } else { 216 auto Cvt0 = B.buildInstr(AMDGPU::G_AMDGPU_CVT_F32_UBYTE0, {S32}, {SrcReg}, 217 MI.getFlags()); 218 B.buildFPTrunc(DstReg, Cvt0, MI.getFlags()); 219 } 220 221 MI.eraseFromParent(); 222 } 223 224 bool AMDGPUPostLegalizerCombinerHelper::matchCvtF32UByteN( 225 MachineInstr &MI, CvtF32UByteMatchInfo &MatchInfo) { 226 Register SrcReg = MI.getOperand(1).getReg(); 227 228 // Look through G_ZEXT. 229 mi_match(SrcReg, MRI, m_GZExt(m_Reg(SrcReg))); 230 231 Register Src0; 232 int64_t ShiftAmt; 233 bool IsShr = mi_match(SrcReg, MRI, m_GLShr(m_Reg(Src0), m_ICst(ShiftAmt))); 234 if (IsShr || mi_match(SrcReg, MRI, m_GShl(m_Reg(Src0), m_ICst(ShiftAmt)))) { 235 const unsigned Offset = MI.getOpcode() - AMDGPU::G_AMDGPU_CVT_F32_UBYTE0; 236 237 unsigned ShiftOffset = 8 * Offset; 238 if (IsShr) 239 ShiftOffset += ShiftAmt; 240 else 241 ShiftOffset -= ShiftAmt; 242 243 MatchInfo.CvtVal = Src0; 244 MatchInfo.ShiftOffset = ShiftOffset; 245 return ShiftOffset < 32 && ShiftOffset >= 8 && (ShiftOffset % 8) == 0; 246 } 247 248 // TODO: Simplify demanded bits. 249 return false; 250 } 251 252 void AMDGPUPostLegalizerCombinerHelper::applyCvtF32UByteN( 253 MachineInstr &MI, const CvtF32UByteMatchInfo &MatchInfo) { 254 B.setInstrAndDebugLoc(MI); 255 unsigned NewOpc = AMDGPU::G_AMDGPU_CVT_F32_UBYTE0 + MatchInfo.ShiftOffset / 8; 256 257 const LLT S32 = LLT::scalar(32); 258 Register CvtSrc = MatchInfo.CvtVal; 259 LLT SrcTy = MRI.getType(MatchInfo.CvtVal); 260 if (SrcTy != S32) { 261 assert(SrcTy.isScalar() && SrcTy.getSizeInBits() >= 8); 262 CvtSrc = B.buildAnyExt(S32, CvtSrc).getReg(0); 263 } 264 265 assert(MI.getOpcode() != NewOpc); 266 B.buildInstr(NewOpc, {MI.getOperand(0)}, {CvtSrc}, MI.getFlags()); 267 MI.eraseFromParent(); 268 } 269 270 bool AMDGPUPostLegalizerCombinerHelper::matchClampI64ToI16( 271 MachineInstr &MI, MachineRegisterInfo &MRI, MachineFunction &MF, 272 ClampI64ToI16MatchInfo &MatchInfo) { 273 assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); 274 const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); 275 276 // we want to check if a 64-bit number gets clamped to 16-bit boundaries (or 277 // below). 278 if (SrcType != LLT::scalar(64)) 279 return false; 280 281 MachineIRBuilder B(MI); 282 283 LLVM_DEBUG(dbgs() << "Matching Clamp i64 to i16"); 284 285 CmpInst::Predicate Predicate1; 286 Register Base; 287 288 if (!mi_match(MI.getOperand(1).getReg(), MRI, m_GISelect(m_GICmp(m_Pred(Predicate1), m_Reg(), m_Reg()), m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) 289 return false; 290 291 CmpInst::Predicate Predicate2; 292 293 if (!mi_match(Base, MRI, m_GISelect(m_GICmp(m_Pred(Predicate2), m_Reg(), m_Reg()), m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) 294 return false; 295 296 if ((Predicate1 == CmpInst::ICMP_SLT && 297 Predicate2 == CmpInst::ICMP_SGT) || 298 (Predicate1 == CmpInst::ICMP_SGT && 299 Predicate2 == CmpInst::ICMP_SLT)) { 300 const auto Cmp1 = MatchInfo.Cmp1; 301 const auto Cmp2 = MatchInfo.Cmp2; 302 const auto Diff = std::abs(Cmp2 - Cmp1); 303 304 // we don't need to clamp here. 305 if (Diff == 0 || Diff == 1) { 306 return false; 307 } 308 309 const int64_t Min = std::numeric_limits<int16_t>::min(); 310 const int64_t Max = std::numeric_limits<int16_t>::max(); 311 312 // are we really trying to clamp against the relevant boundaries? 313 return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || 314 (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); 315 } 316 317 return false; 318 } 319 320 /** 321 * We want to find a combination of instructions that 322 * gets generated when an i64 gets clamped to i16. 323 * The corresponding pattern is: 324 * G_SELECT MIN/MAX, G_ICMP, G_SELECT MIN/MAX, G_ICMP, G_TRUNC. 325 * This can be efficiently written as following: 326 * v_cvt_pk_i16_i32 v0, v0, v1 327 * v_med3_i32 v0, Clamp_Min, v0, Clamp_Max 328 */ 329 void AMDGPUPostLegalizerCombinerHelper::applyClampI64ToI16( 330 MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) { 331 LLVM_DEBUG(dbgs() << "Combining MI"); 332 333 MachineIRBuilder B(MI); 334 MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 335 336 Register Src = MatchInfo.Origin; 337 assert(MRI.getType(Src) == LLT::scalar(64)); 338 const LLT S32 = LLT::scalar(32); 339 340 auto Unmerge = B.buildUnmerge(S32, Src); 341 Register Hi32 = Unmerge->getOperand(0).getReg(); 342 Register Lo32 = Unmerge->getOperand(1).getReg(); 343 MRI.setRegClass(Hi32, &AMDGPU::VGPR_32RegClass); 344 MRI.setRegClass(Lo32, &AMDGPU::VGPR_32RegClass); 345 346 constexpr unsigned int CvtOpcode = AMDGPU::V_CVT_PK_I16_I32_e64; 347 assert(MI.getOpcode() != CvtOpcode); 348 349 const auto REG_CLASS = &AMDGPU::VGPR_32RegClass; 350 351 Register CvtDst = MRI.createVirtualRegister(REG_CLASS); 352 MRI.setType(CvtDst, S32); 353 354 auto CvtPk = B.buildInstr(CvtOpcode); 355 CvtPk.addDef(CvtDst); 356 CvtPk.addReg(Hi32); 357 CvtPk.addReg(Lo32); 358 CvtPk.setMIFlags(MI.getFlags()); 359 360 auto min = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); 361 auto max = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); 362 363 Register MinBoundaryDst = MRI.createVirtualRegister(REG_CLASS); 364 MRI.setType(MinBoundaryDst, S32); 365 B.buildConstant(MinBoundaryDst, min); 366 367 Register MaxBoundaryDst = MRI.createVirtualRegister(REG_CLASS); 368 MRI.setType(MaxBoundaryDst, S32); 369 B.buildConstant(MaxBoundaryDst, max); 370 371 Register MedDst = MRI.createVirtualRegister(REG_CLASS); 372 MRI.setType(MedDst, S32); 373 374 auto Med = B.buildInstr(AMDGPU::V_MED3_I32); 375 Med.addDef(MedDst); 376 Med.addReg(MinBoundaryDst); 377 Med.addReg(CvtDst); 378 Med.addReg(MaxBoundaryDst); 379 Med.setMIFlags(MI.getFlags()); 380 381 B.buildCopy(MI.getOperand(0).getReg(), MedDst); 382 383 MI.eraseFromParent(); 384 } 385 386 class AMDGPUPostLegalizerCombinerHelperState { 387 protected: 388 CombinerHelper &Helper; 389 AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper; 390 391 public: 392 AMDGPUPostLegalizerCombinerHelperState( 393 CombinerHelper &Helper, 394 AMDGPUPostLegalizerCombinerHelper &PostLegalizerHelper) 395 : Helper(Helper), PostLegalizerHelper(PostLegalizerHelper) {} 396 }; 397 398 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 399 #include "AMDGPUGenPostLegalizeGICombiner.inc" 400 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_DEPS 401 402 namespace { 403 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 404 #include "AMDGPUGenPostLegalizeGICombiner.inc" 405 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_H 406 407 class AMDGPUPostLegalizerCombinerInfo final : public CombinerInfo { 408 GISelKnownBits *KB; 409 MachineDominatorTree *MDT; 410 411 public: 412 AMDGPUGenPostLegalizerCombinerHelperRuleConfig GeneratedRuleCfg; 413 414 AMDGPUPostLegalizerCombinerInfo(bool EnableOpt, bool OptSize, bool MinSize, 415 const AMDGPULegalizerInfo *LI, 416 GISelKnownBits *KB, MachineDominatorTree *MDT) 417 : CombinerInfo(/*AllowIllegalOps*/ false, /*ShouldLegalizeIllegal*/ true, 418 /*LegalizerInfo*/ LI, EnableOpt, OptSize, MinSize), 419 KB(KB), MDT(MDT) { 420 if (!GeneratedRuleCfg.parseCommandLineOption()) 421 report_fatal_error("Invalid rule identifier"); 422 } 423 424 bool combine(GISelChangeObserver &Observer, MachineInstr &MI, 425 MachineIRBuilder &B) const override; 426 }; 427 428 bool AMDGPUPostLegalizerCombinerInfo::combine(GISelChangeObserver &Observer, 429 MachineInstr &MI, 430 MachineIRBuilder &B) const { 431 CombinerHelper Helper(Observer, B, KB, MDT, LInfo); 432 AMDGPUPostLegalizerCombinerHelper PostLegalizerHelper(B, Helper); 433 AMDGPUGenPostLegalizerCombinerHelper Generated(GeneratedRuleCfg, Helper, 434 PostLegalizerHelper); 435 436 if (Generated.tryCombineAll(Observer, MI, B)) 437 return true; 438 439 switch (MI.getOpcode()) { 440 case TargetOpcode::G_SHL: 441 case TargetOpcode::G_LSHR: 442 case TargetOpcode::G_ASHR: 443 // On some subtargets, 64-bit shift is a quarter rate instruction. In the 444 // common case, splitting this into a move and a 32-bit shift is faster and 445 // the same code size. 446 return Helper.tryCombineShiftToUnmerge(MI, 32); 447 } 448 449 return false; 450 } 451 452 #define AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 453 #include "AMDGPUGenPostLegalizeGICombiner.inc" 454 #undef AMDGPUPOSTLEGALIZERCOMBINERHELPER_GENCOMBINERHELPER_CPP 455 456 // Pass boilerplate 457 // ================ 458 459 class AMDGPUPostLegalizerCombiner : public MachineFunctionPass { 460 public: 461 static char ID; 462 463 AMDGPUPostLegalizerCombiner(bool IsOptNone = false); 464 465 StringRef getPassName() const override { 466 return "AMDGPUPostLegalizerCombiner"; 467 } 468 469 bool runOnMachineFunction(MachineFunction &MF) override; 470 471 void getAnalysisUsage(AnalysisUsage &AU) const override; 472 473 private: 474 bool IsOptNone; 475 }; 476 } // end anonymous namespace 477 478 void AMDGPUPostLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { 479 AU.addRequired<TargetPassConfig>(); 480 AU.setPreservesCFG(); 481 getSelectionDAGFallbackAnalysisUsage(AU); 482 AU.addRequired<GISelKnownBitsAnalysis>(); 483 AU.addPreserved<GISelKnownBitsAnalysis>(); 484 if (!IsOptNone) { 485 AU.addRequired<MachineDominatorTree>(); 486 AU.addPreserved<MachineDominatorTree>(); 487 } 488 MachineFunctionPass::getAnalysisUsage(AU); 489 } 490 491 AMDGPUPostLegalizerCombiner::AMDGPUPostLegalizerCombiner(bool IsOptNone) 492 : MachineFunctionPass(ID), IsOptNone(IsOptNone) { 493 initializeAMDGPUPostLegalizerCombinerPass(*PassRegistry::getPassRegistry()); 494 } 495 496 bool AMDGPUPostLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { 497 if (MF.getProperties().hasProperty( 498 MachineFunctionProperties::Property::FailedISel)) 499 return false; 500 auto *TPC = &getAnalysis<TargetPassConfig>(); 501 const Function &F = MF.getFunction(); 502 bool EnableOpt = 503 MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); 504 505 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 506 const AMDGPULegalizerInfo *LI = 507 static_cast<const AMDGPULegalizerInfo *>(ST.getLegalizerInfo()); 508 509 GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); 510 MachineDominatorTree *MDT = 511 IsOptNone ? nullptr : &getAnalysis<MachineDominatorTree>(); 512 AMDGPUPostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), 513 F.hasMinSize(), LI, KB, MDT); 514 Combiner C(PCInfo, TPC); 515 return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); 516 } 517 518 char AMDGPUPostLegalizerCombiner::ID = 0; 519 INITIALIZE_PASS_BEGIN(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 520 "Combine AMDGPU machine instrs after legalization", false, 521 false) 522 INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 523 INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) 524 INITIALIZE_PASS_END(AMDGPUPostLegalizerCombiner, DEBUG_TYPE, 525 "Combine AMDGPU machine instrs after legalization", false, 526 false) 527 528 namespace llvm { 529 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone) { 530 return new AMDGPUPostLegalizerCombiner(IsOptNone); 531 } 532 } // end namespace llvm 533