1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief The AMDGPU target machine contains all of the hardware specific 12 /// information needed to emit code for R600 and SI GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUTargetMachine.h" 17 #include "AMDGPU.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUTargetObjectFile.h" 20 #include "AMDGPUTargetTransformInfo.h" 21 #include "GCNSchedStrategy.h" 22 #include "R600ISelLowering.h" 23 #include "R600InstrInfo.h" 24 #include "R600MachineScheduler.h" 25 #include "SIISelLowering.h" 26 #include "SIInstrInfo.h" 27 #include "SIMachineScheduler.h" 28 #include "llvm/CodeGen/GlobalISel/IRTranslator.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/CodeGen/TargetPassConfig.h" 31 #include "llvm/Support/TargetRegistry.h" 32 #include "llvm/Transforms/IPO.h" 33 #include "llvm/Transforms/IPO/AlwaysInliner.h" 34 #include "llvm/Transforms/Scalar.h" 35 #include "llvm/Transforms/Scalar/GVN.h" 36 #include "llvm/Transforms/Vectorize.h" 37 38 using namespace llvm; 39 40 static cl::opt<bool> EnableR600StructurizeCFG( 41 "r600-ir-structurize", 42 cl::desc("Use StructurizeCFG IR pass"), 43 cl::init(true)); 44 45 static cl::opt<bool> EnableSROA( 46 "amdgpu-sroa", 47 cl::desc("Run SROA after promote alloca pass"), 48 cl::ReallyHidden, 49 cl::init(true)); 50 51 static cl::opt<bool> EnableR600IfConvert( 52 "r600-if-convert", 53 cl::desc("Use if conversion pass"), 54 cl::ReallyHidden, 55 cl::init(true)); 56 57 // Option to disable vectorizer for tests. 58 static cl::opt<bool> EnableLoadStoreVectorizer( 59 "amdgpu-load-store-vectorizer", 60 cl::desc("Enable load store vectorizer"), 61 cl::init(true), 62 cl::Hidden); 63 64 extern "C" void LLVMInitializeAMDGPUTarget() { 65 // Register the target 66 RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget()); 67 RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget()); 68 69 PassRegistry *PR = PassRegistry::getPassRegistry(); 70 initializeSILowerI1CopiesPass(*PR); 71 initializeSIFixSGPRCopiesPass(*PR); 72 initializeSIFoldOperandsPass(*PR); 73 initializeSIShrinkInstructionsPass(*PR); 74 initializeSIFixControlFlowLiveIntervalsPass(*PR); 75 initializeSILoadStoreOptimizerPass(*PR); 76 initializeAMDGPUAnnotateKernelFeaturesPass(*PR); 77 initializeAMDGPUAnnotateUniformValuesPass(*PR); 78 initializeAMDGPUPromoteAllocaPass(*PR); 79 initializeAMDGPUCodeGenPreparePass(*PR); 80 initializeSIAnnotateControlFlowPass(*PR); 81 initializeSIInsertWaitsPass(*PR); 82 initializeSIWholeQuadModePass(*PR); 83 initializeSILowerControlFlowPass(*PR); 84 initializeSIInsertSkipsPass(*PR); 85 initializeSIDebuggerInsertNopsPass(*PR); 86 initializeSIOptimizeExecMaskingPass(*PR); 87 } 88 89 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 90 return make_unique<AMDGPUTargetObjectFile>(); 91 } 92 93 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 94 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>()); 95 } 96 97 static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { 98 return new SIScheduleDAGMI(C); 99 } 100 101 static ScheduleDAGInstrs * 102 createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { 103 ScheduleDAGMILive *DAG = 104 new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C)); 105 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 106 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 107 return DAG; 108 } 109 110 static MachineSchedRegistry 111 R600SchedRegistry("r600", "Run R600's custom scheduler", 112 createR600MachineScheduler); 113 114 static MachineSchedRegistry 115 SISchedRegistry("si", "Run SI's custom scheduler", 116 createSIMachineScheduler); 117 118 static MachineSchedRegistry 119 GCNMaxOccupancySchedRegistry("gcn-max-occupancy", 120 "Run GCN scheduler to maximize occupancy", 121 createGCNMaxOccupancyMachineScheduler); 122 123 static StringRef computeDataLayout(const Triple &TT) { 124 if (TT.getArch() == Triple::r600) { 125 // 32-bit pointers. 126 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 127 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 128 } 129 130 // 32-bit private, local, and region pointers. 64-bit global, constant and 131 // flat. 132 return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" 133 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 134 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 135 } 136 137 LLVM_READNONE 138 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) { 139 if (!GPU.empty()) 140 return GPU; 141 142 // HSA only supports CI+, so change the default GPU to a CI for HSA. 143 if (TT.getArch() == Triple::amdgcn) 144 return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti"; 145 146 return "r600"; 147 } 148 149 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { 150 // The AMDGPU toolchain only supports generating shared objects, so we 151 // must always use PIC. 152 return Reloc::PIC_; 153 } 154 155 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, 156 StringRef CPU, StringRef FS, 157 TargetOptions Options, 158 Optional<Reloc::Model> RM, 159 CodeModel::Model CM, 160 CodeGenOpt::Level OptLevel) 161 : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), 162 FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), 163 TLOF(createTLOF(getTargetTriple())), 164 IntrinsicInfo() { 165 setRequiresStructuredCFG(true); 166 initAsmInfo(); 167 } 168 169 AMDGPUTargetMachine::~AMDGPUTargetMachine() { } 170 171 StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const { 172 Attribute GPUAttr = F.getFnAttribute("target-cpu"); 173 return GPUAttr.hasAttribute(Attribute::None) ? 174 getTargetCPU() : GPUAttr.getValueAsString(); 175 } 176 177 StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const { 178 Attribute FSAttr = F.getFnAttribute("target-features"); 179 180 return FSAttr.hasAttribute(Attribute::None) ? 181 getTargetFeatureString() : 182 FSAttr.getValueAsString(); 183 } 184 185 //===----------------------------------------------------------------------===// 186 // R600 Target Machine (R600 -> Cayman) 187 //===----------------------------------------------------------------------===// 188 189 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, 190 StringRef CPU, StringRef FS, 191 TargetOptions Options, 192 Optional<Reloc::Model> RM, 193 CodeModel::Model CM, CodeGenOpt::Level OL) 194 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} 195 196 const R600Subtarget *R600TargetMachine::getSubtargetImpl( 197 const Function &F) const { 198 StringRef GPU = getGPUName(F); 199 StringRef FS = getFeatureString(F); 200 201 SmallString<128> SubtargetKey(GPU); 202 SubtargetKey.append(FS); 203 204 auto &I = SubtargetMap[SubtargetKey]; 205 if (!I) { 206 // This needs to be done before we create a new subtarget since any 207 // creation will depend on the TM and the code generation flags on the 208 // function that reside in TargetOptions. 209 resetTargetOptions(F); 210 I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this); 211 } 212 213 return I.get(); 214 } 215 216 //===----------------------------------------------------------------------===// 217 // GCN Target Machine (SI+) 218 //===----------------------------------------------------------------------===// 219 220 #ifdef LLVM_BUILD_GLOBAL_ISEL 221 namespace { 222 struct SIGISelActualAccessor : public GISelAccessor { 223 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 224 const AMDGPUCallLowering *getCallLowering() const override { 225 return CallLoweringInfo.get(); 226 } 227 }; 228 } // End anonymous namespace. 229 #endif 230 231 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, 232 StringRef CPU, StringRef FS, 233 TargetOptions Options, 234 Optional<Reloc::Model> RM, 235 CodeModel::Model CM, CodeGenOpt::Level OL) 236 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} 237 238 const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { 239 StringRef GPU = getGPUName(F); 240 StringRef FS = getFeatureString(F); 241 242 SmallString<128> SubtargetKey(GPU); 243 SubtargetKey.append(FS); 244 245 auto &I = SubtargetMap[SubtargetKey]; 246 if (!I) { 247 // This needs to be done before we create a new subtarget since any 248 // creation will depend on the TM and the code generation flags on the 249 // function that reside in TargetOptions. 250 resetTargetOptions(F); 251 I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this); 252 253 #ifndef LLVM_BUILD_GLOBAL_ISEL 254 GISelAccessor *GISel = new GISelAccessor(); 255 #else 256 SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); 257 GISel->CallLoweringInfo.reset( 258 new AMDGPUCallLowering(*I->getTargetLowering())); 259 #endif 260 261 I->setGISelAccessor(*GISel); 262 } 263 264 return I.get(); 265 } 266 267 //===----------------------------------------------------------------------===// 268 // AMDGPU Pass Setup 269 //===----------------------------------------------------------------------===// 270 271 namespace { 272 273 class AMDGPUPassConfig : public TargetPassConfig { 274 public: 275 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 276 : TargetPassConfig(TM, PM) { 277 278 // Exceptions and StackMaps are not supported, so these passes will never do 279 // anything. 280 disablePass(&StackMapLivenessID); 281 disablePass(&FuncletLayoutID); 282 } 283 284 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 285 return getTM<AMDGPUTargetMachine>(); 286 } 287 288 ScheduleDAGInstrs * 289 createMachineScheduler(MachineSchedContext *C) const override { 290 ScheduleDAGMILive *DAG = createGenericSchedLive(C); 291 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 292 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 293 return DAG; 294 } 295 296 void addEarlyCSEOrGVNPass(); 297 void addStraightLineScalarOptimizationPasses(); 298 void addIRPasses() override; 299 void addCodeGenPrepare() override; 300 bool addPreISel() override; 301 bool addInstSelector() override; 302 bool addGCPasses() override; 303 }; 304 305 class R600PassConfig final : public AMDGPUPassConfig { 306 public: 307 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 308 : AMDGPUPassConfig(TM, PM) { } 309 310 ScheduleDAGInstrs *createMachineScheduler( 311 MachineSchedContext *C) const override { 312 return createR600MachineScheduler(C); 313 } 314 315 bool addPreISel() override; 316 void addPreRegAlloc() override; 317 void addPreSched2() override; 318 void addPreEmitPass() override; 319 }; 320 321 class GCNPassConfig final : public AMDGPUPassConfig { 322 public: 323 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 324 : AMDGPUPassConfig(TM, PM) { } 325 326 GCNTargetMachine &getGCNTargetMachine() const { 327 return getTM<GCNTargetMachine>(); 328 } 329 330 ScheduleDAGInstrs * 331 createMachineScheduler(MachineSchedContext *C) const override; 332 333 void addIRPasses() override; 334 bool addPreISel() override; 335 void addMachineSSAOptimization() override; 336 bool addInstSelector() override; 337 #ifdef LLVM_BUILD_GLOBAL_ISEL 338 bool addIRTranslator() override; 339 bool addLegalizeMachineIR() override; 340 bool addRegBankSelect() override; 341 bool addGlobalInstructionSelect() override; 342 #endif 343 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 344 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 345 void addPreRegAlloc() override; 346 void addPostRegAlloc() override; 347 void addPreSched2() override; 348 void addPreEmitPass() override; 349 }; 350 351 } // End of anonymous namespace 352 353 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 354 return TargetIRAnalysis([this](const Function &F) { 355 return TargetTransformInfo(AMDGPUTTIImpl(this, F)); 356 }); 357 } 358 359 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() { 360 if (getOptLevel() == CodeGenOpt::Aggressive) 361 addPass(createGVNPass()); 362 else 363 addPass(createEarlyCSEPass()); 364 } 365 366 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() { 367 addPass(createSeparateConstOffsetFromGEPPass()); 368 addPass(createSpeculativeExecutionPass()); 369 // ReassociateGEPs exposes more opportunites for SLSR. See 370 // the example in reassociate-geps-and-slsr.ll. 371 addPass(createStraightLineStrengthReducePass()); 372 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or 373 // EarlyCSE can reuse. 374 addEarlyCSEOrGVNPass(); 375 // Run NaryReassociate after EarlyCSE/GVN to be more effective. 376 addPass(createNaryReassociatePass()); 377 // NaryReassociate on GEPs creates redundant common expressions, so run 378 // EarlyCSE after it. 379 addPass(createEarlyCSEPass()); 380 } 381 382 void AMDGPUPassConfig::addIRPasses() { 383 // There is no reason to run these. 384 disablePass(&StackMapLivenessID); 385 disablePass(&FuncletLayoutID); 386 disablePass(&PatchableFunctionID); 387 388 // Function calls are not supported, so make sure we inline everything. 389 addPass(createAMDGPUAlwaysInlinePass()); 390 addPass(createAlwaysInlinerLegacyPass()); 391 // We need to add the barrier noop pass, otherwise adding the function 392 // inlining pass will cause all of the PassConfigs passes to be run 393 // one function at a time, which means if we have a nodule with two 394 // functions, then we will generate code for the first function 395 // without ever running any passes on the second. 396 addPass(createBarrierNoopPass()); 397 398 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. 399 addPass(createAMDGPUOpenCLImageTypeLoweringPass()); 400 401 const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); 402 if (TM.getOptLevel() > CodeGenOpt::None) { 403 addPass(createAMDGPUPromoteAlloca(&TM)); 404 405 if (EnableSROA) 406 addPass(createSROAPass()); 407 408 addStraightLineScalarOptimizationPasses(); 409 } 410 411 TargetPassConfig::addIRPasses(); 412 413 // EarlyCSE is not always strong enough to clean up what LSR produces. For 414 // example, GVN can combine 415 // 416 // %0 = add %a, %b 417 // %1 = add %b, %a 418 // 419 // and 420 // 421 // %0 = shl nsw %a, 2 422 // %1 = shl %a, 2 423 // 424 // but EarlyCSE can do neither of them. 425 if (getOptLevel() != CodeGenOpt::None) 426 addEarlyCSEOrGVNPass(); 427 } 428 429 void AMDGPUPassConfig::addCodeGenPrepare() { 430 TargetPassConfig::addCodeGenPrepare(); 431 432 if (EnableLoadStoreVectorizer) 433 addPass(createLoadStoreVectorizerPass()); 434 } 435 436 bool AMDGPUPassConfig::addPreISel() { 437 addPass(createFlattenCFGPass()); 438 return false; 439 } 440 441 bool AMDGPUPassConfig::addInstSelector() { 442 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel())); 443 return false; 444 } 445 446 bool AMDGPUPassConfig::addGCPasses() { 447 // Do nothing. GC is not supported. 448 return false; 449 } 450 451 //===----------------------------------------------------------------------===// 452 // R600 Pass Setup 453 //===----------------------------------------------------------------------===// 454 455 bool R600PassConfig::addPreISel() { 456 AMDGPUPassConfig::addPreISel(); 457 458 if (EnableR600StructurizeCFG) 459 addPass(createStructurizeCFGPass()); 460 return false; 461 } 462 463 void R600PassConfig::addPreRegAlloc() { 464 addPass(createR600VectorRegMerger(*TM)); 465 } 466 467 void R600PassConfig::addPreSched2() { 468 addPass(createR600EmitClauseMarkers(), false); 469 if (EnableR600IfConvert) 470 addPass(&IfConverterID, false); 471 addPass(createR600ClauseMergePass(*TM), false); 472 } 473 474 void R600PassConfig::addPreEmitPass() { 475 addPass(createAMDGPUCFGStructurizerPass(), false); 476 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 477 addPass(&FinalizeMachineBundlesID, false); 478 addPass(createR600Packetizer(*TM), false); 479 addPass(createR600ControlFlowFinalizer(*TM), false); 480 } 481 482 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 483 return new R600PassConfig(this, PM); 484 } 485 486 //===----------------------------------------------------------------------===// 487 // GCN Pass Setup 488 //===----------------------------------------------------------------------===// 489 490 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( 491 MachineSchedContext *C) const { 492 const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>(); 493 if (ST.enableSIScheduler()) 494 return createSIMachineScheduler(C); 495 return createGCNMaxOccupancyMachineScheduler(C); 496 } 497 498 bool GCNPassConfig::addPreISel() { 499 AMDGPUPassConfig::addPreISel(); 500 501 // FIXME: We need to run a pass to propagate the attributes when calls are 502 // supported. 503 addPass(&AMDGPUAnnotateKernelFeaturesID); 504 addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions 505 addPass(createSinkingPass()); 506 addPass(createSITypeRewriter()); 507 addPass(createAMDGPUAnnotateUniformValues()); 508 addPass(createSIAnnotateControlFlowPass()); 509 510 return false; 511 } 512 513 void GCNPassConfig::addMachineSSAOptimization() { 514 TargetPassConfig::addMachineSSAOptimization(); 515 516 // We want to fold operands after PeepholeOptimizer has run (or as part of 517 // it), because it will eliminate extra copies making it easier to fold the 518 // real source operand. We want to eliminate dead instructions after, so that 519 // we see fewer uses of the copies. We then need to clean up the dead 520 // instructions leftover after the operands are folded as well. 521 // 522 // XXX - Can we get away without running DeadMachineInstructionElim again? 523 addPass(&SIFoldOperandsID); 524 addPass(&DeadMachineInstructionElimID); 525 addPass(&SILoadStoreOptimizerID); 526 } 527 528 void GCNPassConfig::addIRPasses() { 529 // TODO: May want to move later or split into an early and late one. 530 addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine())); 531 532 AMDGPUPassConfig::addIRPasses(); 533 } 534 535 bool GCNPassConfig::addInstSelector() { 536 AMDGPUPassConfig::addInstSelector(); 537 addPass(createSILowerI1CopiesPass()); 538 addPass(&SIFixSGPRCopiesID); 539 return false; 540 } 541 542 #ifdef LLVM_BUILD_GLOBAL_ISEL 543 bool GCNPassConfig::addIRTranslator() { 544 addPass(new IRTranslator()); 545 return false; 546 } 547 548 bool GCNPassConfig::addLegalizeMachineIR() { 549 return false; 550 } 551 552 bool GCNPassConfig::addRegBankSelect() { 553 return false; 554 } 555 556 bool GCNPassConfig::addGlobalInstructionSelect() { 557 return false; 558 } 559 #endif 560 561 void GCNPassConfig::addPreRegAlloc() { 562 addPass(createSIShrinkInstructionsPass()); 563 addPass(createSIWholeQuadModePass()); 564 } 565 566 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 567 // FIXME: We have to disable the verifier here because of PHIElimination + 568 // TwoAddressInstructions disabling it. 569 570 // This must be run immediately after phi elimination and before 571 // TwoAddressInstructions, otherwise the processing of the tied operand of 572 // SI_ELSE will introduce a copy of the tied operand source after the else. 573 insertPass(&PHIEliminationID, &SILowerControlFlowID, false); 574 575 TargetPassConfig::addFastRegAlloc(RegAllocPass); 576 } 577 578 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 579 // This needs to be run directly before register allocation because earlier 580 // passes might recompute live intervals. 581 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); 582 583 // This must be run immediately after phi elimination and before 584 // TwoAddressInstructions, otherwise the processing of the tied operand of 585 // SI_ELSE will introduce a copy of the tied operand source after the else. 586 insertPass(&PHIEliminationID, &SILowerControlFlowID, false); 587 588 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); 589 } 590 591 void GCNPassConfig::addPostRegAlloc() { 592 addPass(&SIOptimizeExecMaskingID); 593 TargetPassConfig::addPostRegAlloc(); 594 } 595 596 void GCNPassConfig::addPreSched2() { 597 } 598 599 void GCNPassConfig::addPreEmitPass() { 600 // The hazard recognizer that runs as part of the post-ra scheduler does not 601 // guarantee to be able handle all hazards correctly. This is because if there 602 // are multiple scheduling regions in a basic block, the regions are scheduled 603 // bottom up, so when we begin to schedule a region we don't know what 604 // instructions were emitted directly before it. 605 // 606 // Here we add a stand-alone hazard recognizer pass which can handle all 607 // cases. 608 addPass(&PostRAHazardRecognizerID); 609 610 addPass(createSIInsertWaitsPass()); 611 addPass(createSIShrinkInstructionsPass()); 612 addPass(&SIInsertSkipsPassID); 613 addPass(createSIDebuggerInsertNopsPass()); 614 addPass(&BranchRelaxationPassID); 615 } 616 617 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 618 return new GCNPassConfig(this, PM); 619 } 620