1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief The AMDGPU target machine contains all of the hardware specific 12 /// information needed to emit code for R600 and SI GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUTargetMachine.h" 17 #include "AMDGPU.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUTargetObjectFile.h" 20 #include "AMDGPUTargetTransformInfo.h" 21 #include "GCNSchedStrategy.h" 22 #include "R600ISelLowering.h" 23 #include "R600InstrInfo.h" 24 #include "R600MachineScheduler.h" 25 #include "SIISelLowering.h" 26 #include "SIInstrInfo.h" 27 #include "SIMachineScheduler.h" 28 #include "llvm/CodeGen/GlobalISel/IRTranslator.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/CodeGen/TargetPassConfig.h" 31 #include "llvm/Support/TargetRegistry.h" 32 #include "llvm/Transforms/IPO.h" 33 #include "llvm/Transforms/IPO/AlwaysInliner.h" 34 #include "llvm/Transforms/Scalar.h" 35 #include "llvm/Transforms/Scalar/GVN.h" 36 #include "llvm/Transforms/Vectorize.h" 37 38 using namespace llvm; 39 40 static cl::opt<bool> EnableR600StructurizeCFG( 41 "r600-ir-structurize", 42 cl::desc("Use StructurizeCFG IR pass"), 43 cl::init(true)); 44 45 static cl::opt<bool> EnableSROA( 46 "amdgpu-sroa", 47 cl::desc("Run SROA after promote alloca pass"), 48 cl::ReallyHidden, 49 cl::init(true)); 50 51 static cl::opt<bool> EnableR600IfConvert( 52 "r600-if-convert", 53 cl::desc("Use if conversion pass"), 54 cl::ReallyHidden, 55 cl::init(true)); 56 57 // Option to disable vectorizer for tests. 58 static cl::opt<bool> EnableLoadStoreVectorizer( 59 "amdgpu-load-store-vectorizer", 60 cl::desc("Enable load store vectorizer"), 61 cl::init(true), 62 cl::Hidden); 63 64 extern "C" void LLVMInitializeAMDGPUTarget() { 65 // Register the target 66 RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget()); 67 RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget()); 68 69 PassRegistry *PR = PassRegistry::getPassRegistry(); 70 initializeSILowerI1CopiesPass(*PR); 71 initializeSIFixSGPRCopiesPass(*PR); 72 initializeSIFoldOperandsPass(*PR); 73 initializeSIShrinkInstructionsPass(*PR); 74 initializeSIFixControlFlowLiveIntervalsPass(*PR); 75 initializeSILoadStoreOptimizerPass(*PR); 76 initializeAMDGPUAnnotateKernelFeaturesPass(*PR); 77 initializeAMDGPUAnnotateUniformValuesPass(*PR); 78 initializeAMDGPUPromoteAllocaPass(*PR); 79 initializeAMDGPUCodeGenPreparePass(*PR); 80 initializeSIAnnotateControlFlowPass(*PR); 81 initializeSIInsertWaitsPass(*PR); 82 initializeSIWholeQuadModePass(*PR); 83 initializeSILowerControlFlowPass(*PR); 84 initializeSIInsertSkipsPass(*PR); 85 initializeSIDebuggerInsertNopsPass(*PR); 86 initializeSIOptimizeExecMaskingPass(*PR); 87 } 88 89 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 90 return make_unique<AMDGPUTargetObjectFile>(); 91 } 92 93 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 94 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>()); 95 } 96 97 static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { 98 return new SIScheduleDAGMI(C); 99 } 100 101 static ScheduleDAGInstrs * 102 createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { 103 ScheduleDAGMILive *DAG = 104 new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C)); 105 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 106 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 107 return DAG; 108 } 109 110 static MachineSchedRegistry 111 R600SchedRegistry("r600", "Run R600's custom scheduler", 112 createR600MachineScheduler); 113 114 static MachineSchedRegistry 115 SISchedRegistry("si", "Run SI's custom scheduler", 116 createSIMachineScheduler); 117 118 static MachineSchedRegistry 119 GCNMaxOccupancySchedRegistry("gcn-max-occupancy", 120 "Run GCN scheduler to maximize occupancy", 121 createGCNMaxOccupancyMachineScheduler); 122 123 static StringRef computeDataLayout(const Triple &TT) { 124 if (TT.getArch() == Triple::r600) { 125 // 32-bit pointers. 126 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 127 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 128 } 129 130 // 32-bit private, local, and region pointers. 64-bit global, constant and 131 // flat. 132 return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" 133 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 134 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 135 } 136 137 LLVM_READNONE 138 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) { 139 if (!GPU.empty()) 140 return GPU; 141 142 // HSA only supports CI+, so change the default GPU to a CI for HSA. 143 if (TT.getArch() == Triple::amdgcn) 144 return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti"; 145 146 return "r600"; 147 } 148 149 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { 150 // The AMDGPU toolchain only supports generating shared objects, so we 151 // must always use PIC. 152 return Reloc::PIC_; 153 } 154 155 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, 156 StringRef CPU, StringRef FS, 157 TargetOptions Options, 158 Optional<Reloc::Model> RM, 159 CodeModel::Model CM, 160 CodeGenOpt::Level OptLevel) 161 : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), 162 FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), 163 TLOF(createTLOF(getTargetTriple())), 164 IntrinsicInfo() { 165 initAsmInfo(); 166 } 167 168 AMDGPUTargetMachine::~AMDGPUTargetMachine() { } 169 170 StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const { 171 Attribute GPUAttr = F.getFnAttribute("target-cpu"); 172 return GPUAttr.hasAttribute(Attribute::None) ? 173 getTargetCPU() : GPUAttr.getValueAsString(); 174 } 175 176 StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const { 177 Attribute FSAttr = F.getFnAttribute("target-features"); 178 179 return FSAttr.hasAttribute(Attribute::None) ? 180 getTargetFeatureString() : 181 FSAttr.getValueAsString(); 182 } 183 184 //===----------------------------------------------------------------------===// 185 // R600 Target Machine (R600 -> Cayman) 186 //===----------------------------------------------------------------------===// 187 188 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, 189 StringRef CPU, StringRef FS, 190 TargetOptions Options, 191 Optional<Reloc::Model> RM, 192 CodeModel::Model CM, CodeGenOpt::Level OL) 193 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { 194 setRequiresStructuredCFG(true); 195 } 196 197 const R600Subtarget *R600TargetMachine::getSubtargetImpl( 198 const Function &F) const { 199 StringRef GPU = getGPUName(F); 200 StringRef FS = getFeatureString(F); 201 202 SmallString<128> SubtargetKey(GPU); 203 SubtargetKey.append(FS); 204 205 auto &I = SubtargetMap[SubtargetKey]; 206 if (!I) { 207 // This needs to be done before we create a new subtarget since any 208 // creation will depend on the TM and the code generation flags on the 209 // function that reside in TargetOptions. 210 resetTargetOptions(F); 211 I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this); 212 } 213 214 return I.get(); 215 } 216 217 //===----------------------------------------------------------------------===// 218 // GCN Target Machine (SI+) 219 //===----------------------------------------------------------------------===// 220 221 #ifdef LLVM_BUILD_GLOBAL_ISEL 222 namespace { 223 struct SIGISelActualAccessor : public GISelAccessor { 224 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 225 const AMDGPUCallLowering *getCallLowering() const override { 226 return CallLoweringInfo.get(); 227 } 228 }; 229 } // End anonymous namespace. 230 #endif 231 232 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, 233 StringRef CPU, StringRef FS, 234 TargetOptions Options, 235 Optional<Reloc::Model> RM, 236 CodeModel::Model CM, CodeGenOpt::Level OL) 237 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} 238 239 const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { 240 StringRef GPU = getGPUName(F); 241 StringRef FS = getFeatureString(F); 242 243 SmallString<128> SubtargetKey(GPU); 244 SubtargetKey.append(FS); 245 246 auto &I = SubtargetMap[SubtargetKey]; 247 if (!I) { 248 // This needs to be done before we create a new subtarget since any 249 // creation will depend on the TM and the code generation flags on the 250 // function that reside in TargetOptions. 251 resetTargetOptions(F); 252 I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this); 253 254 #ifndef LLVM_BUILD_GLOBAL_ISEL 255 GISelAccessor *GISel = new GISelAccessor(); 256 #else 257 SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); 258 GISel->CallLoweringInfo.reset( 259 new AMDGPUCallLowering(*I->getTargetLowering())); 260 #endif 261 262 I->setGISelAccessor(*GISel); 263 } 264 265 return I.get(); 266 } 267 268 //===----------------------------------------------------------------------===// 269 // AMDGPU Pass Setup 270 //===----------------------------------------------------------------------===// 271 272 namespace { 273 274 class AMDGPUPassConfig : public TargetPassConfig { 275 public: 276 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 277 : TargetPassConfig(TM, PM) { 278 279 // Exceptions and StackMaps are not supported, so these passes will never do 280 // anything. 281 disablePass(&StackMapLivenessID); 282 disablePass(&FuncletLayoutID); 283 } 284 285 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 286 return getTM<AMDGPUTargetMachine>(); 287 } 288 289 ScheduleDAGInstrs * 290 createMachineScheduler(MachineSchedContext *C) const override { 291 ScheduleDAGMILive *DAG = createGenericSchedLive(C); 292 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 293 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 294 return DAG; 295 } 296 297 void addEarlyCSEOrGVNPass(); 298 void addStraightLineScalarOptimizationPasses(); 299 void addIRPasses() override; 300 void addCodeGenPrepare() override; 301 bool addPreISel() override; 302 bool addInstSelector() override; 303 bool addGCPasses() override; 304 }; 305 306 class R600PassConfig final : public AMDGPUPassConfig { 307 public: 308 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 309 : AMDGPUPassConfig(TM, PM) { } 310 311 ScheduleDAGInstrs *createMachineScheduler( 312 MachineSchedContext *C) const override { 313 return createR600MachineScheduler(C); 314 } 315 316 bool addPreISel() override; 317 void addPreRegAlloc() override; 318 void addPreSched2() override; 319 void addPreEmitPass() override; 320 }; 321 322 class GCNPassConfig final : public AMDGPUPassConfig { 323 public: 324 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 325 : AMDGPUPassConfig(TM, PM) { } 326 327 GCNTargetMachine &getGCNTargetMachine() const { 328 return getTM<GCNTargetMachine>(); 329 } 330 331 ScheduleDAGInstrs * 332 createMachineScheduler(MachineSchedContext *C) const override; 333 334 void addIRPasses() override; 335 bool addPreISel() override; 336 void addMachineSSAOptimization() override; 337 bool addInstSelector() override; 338 #ifdef LLVM_BUILD_GLOBAL_ISEL 339 bool addIRTranslator() override; 340 bool addLegalizeMachineIR() override; 341 bool addRegBankSelect() override; 342 bool addGlobalInstructionSelect() override; 343 #endif 344 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 345 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 346 void addPreRegAlloc() override; 347 void addPostRegAlloc() override; 348 void addPreSched2() override; 349 void addPreEmitPass() override; 350 }; 351 352 } // End of anonymous namespace 353 354 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 355 return TargetIRAnalysis([this](const Function &F) { 356 return TargetTransformInfo(AMDGPUTTIImpl(this, F)); 357 }); 358 } 359 360 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() { 361 if (getOptLevel() == CodeGenOpt::Aggressive) 362 addPass(createGVNPass()); 363 else 364 addPass(createEarlyCSEPass()); 365 } 366 367 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() { 368 addPass(createSeparateConstOffsetFromGEPPass()); 369 addPass(createSpeculativeExecutionPass()); 370 // ReassociateGEPs exposes more opportunites for SLSR. See 371 // the example in reassociate-geps-and-slsr.ll. 372 addPass(createStraightLineStrengthReducePass()); 373 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or 374 // EarlyCSE can reuse. 375 addEarlyCSEOrGVNPass(); 376 // Run NaryReassociate after EarlyCSE/GVN to be more effective. 377 addPass(createNaryReassociatePass()); 378 // NaryReassociate on GEPs creates redundant common expressions, so run 379 // EarlyCSE after it. 380 addPass(createEarlyCSEPass()); 381 } 382 383 void AMDGPUPassConfig::addIRPasses() { 384 // There is no reason to run these. 385 disablePass(&StackMapLivenessID); 386 disablePass(&FuncletLayoutID); 387 disablePass(&PatchableFunctionID); 388 389 // Function calls are not supported, so make sure we inline everything. 390 addPass(createAMDGPUAlwaysInlinePass()); 391 addPass(createAlwaysInlinerLegacyPass()); 392 // We need to add the barrier noop pass, otherwise adding the function 393 // inlining pass will cause all of the PassConfigs passes to be run 394 // one function at a time, which means if we have a nodule with two 395 // functions, then we will generate code for the first function 396 // without ever running any passes on the second. 397 addPass(createBarrierNoopPass()); 398 399 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. 400 addPass(createAMDGPUOpenCLImageTypeLoweringPass()); 401 402 const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); 403 if (TM.getOptLevel() > CodeGenOpt::None) { 404 addPass(createAMDGPUPromoteAlloca(&TM)); 405 406 if (EnableSROA) 407 addPass(createSROAPass()); 408 409 addStraightLineScalarOptimizationPasses(); 410 } 411 412 TargetPassConfig::addIRPasses(); 413 414 // EarlyCSE is not always strong enough to clean up what LSR produces. For 415 // example, GVN can combine 416 // 417 // %0 = add %a, %b 418 // %1 = add %b, %a 419 // 420 // and 421 // 422 // %0 = shl nsw %a, 2 423 // %1 = shl %a, 2 424 // 425 // but EarlyCSE can do neither of them. 426 if (getOptLevel() != CodeGenOpt::None) 427 addEarlyCSEOrGVNPass(); 428 } 429 430 void AMDGPUPassConfig::addCodeGenPrepare() { 431 TargetPassConfig::addCodeGenPrepare(); 432 433 if (EnableLoadStoreVectorizer) 434 addPass(createLoadStoreVectorizerPass()); 435 } 436 437 bool AMDGPUPassConfig::addPreISel() { 438 addPass(createFlattenCFGPass()); 439 return false; 440 } 441 442 bool AMDGPUPassConfig::addInstSelector() { 443 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel())); 444 return false; 445 } 446 447 bool AMDGPUPassConfig::addGCPasses() { 448 // Do nothing. GC is not supported. 449 return false; 450 } 451 452 //===----------------------------------------------------------------------===// 453 // R600 Pass Setup 454 //===----------------------------------------------------------------------===// 455 456 bool R600PassConfig::addPreISel() { 457 AMDGPUPassConfig::addPreISel(); 458 459 if (EnableR600StructurizeCFG) 460 addPass(createStructurizeCFGPass()); 461 return false; 462 } 463 464 void R600PassConfig::addPreRegAlloc() { 465 addPass(createR600VectorRegMerger(*TM)); 466 } 467 468 void R600PassConfig::addPreSched2() { 469 addPass(createR600EmitClauseMarkers(), false); 470 if (EnableR600IfConvert) 471 addPass(&IfConverterID, false); 472 addPass(createR600ClauseMergePass(*TM), false); 473 } 474 475 void R600PassConfig::addPreEmitPass() { 476 addPass(createAMDGPUCFGStructurizerPass(), false); 477 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 478 addPass(&FinalizeMachineBundlesID, false); 479 addPass(createR600Packetizer(*TM), false); 480 addPass(createR600ControlFlowFinalizer(*TM), false); 481 } 482 483 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 484 return new R600PassConfig(this, PM); 485 } 486 487 //===----------------------------------------------------------------------===// 488 // GCN Pass Setup 489 //===----------------------------------------------------------------------===// 490 491 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( 492 MachineSchedContext *C) const { 493 const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>(); 494 if (ST.enableSIScheduler()) 495 return createSIMachineScheduler(C); 496 return createGCNMaxOccupancyMachineScheduler(C); 497 } 498 499 bool GCNPassConfig::addPreISel() { 500 AMDGPUPassConfig::addPreISel(); 501 502 // FIXME: We need to run a pass to propagate the attributes when calls are 503 // supported. 504 addPass(&AMDGPUAnnotateKernelFeaturesID); 505 addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions 506 addPass(createSinkingPass()); 507 addPass(createSITypeRewriter()); 508 addPass(createAMDGPUAnnotateUniformValues()); 509 addPass(createSIAnnotateControlFlowPass()); 510 511 return false; 512 } 513 514 void GCNPassConfig::addMachineSSAOptimization() { 515 TargetPassConfig::addMachineSSAOptimization(); 516 517 // We want to fold operands after PeepholeOptimizer has run (or as part of 518 // it), because it will eliminate extra copies making it easier to fold the 519 // real source operand. We want to eliminate dead instructions after, so that 520 // we see fewer uses of the copies. We then need to clean up the dead 521 // instructions leftover after the operands are folded as well. 522 // 523 // XXX - Can we get away without running DeadMachineInstructionElim again? 524 addPass(&SIFoldOperandsID); 525 addPass(&DeadMachineInstructionElimID); 526 addPass(&SILoadStoreOptimizerID); 527 } 528 529 void GCNPassConfig::addIRPasses() { 530 // TODO: May want to move later or split into an early and late one. 531 addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine())); 532 533 AMDGPUPassConfig::addIRPasses(); 534 } 535 536 bool GCNPassConfig::addInstSelector() { 537 AMDGPUPassConfig::addInstSelector(); 538 addPass(createSILowerI1CopiesPass()); 539 addPass(&SIFixSGPRCopiesID); 540 return false; 541 } 542 543 #ifdef LLVM_BUILD_GLOBAL_ISEL 544 bool GCNPassConfig::addIRTranslator() { 545 addPass(new IRTranslator()); 546 return false; 547 } 548 549 bool GCNPassConfig::addLegalizeMachineIR() { 550 return false; 551 } 552 553 bool GCNPassConfig::addRegBankSelect() { 554 return false; 555 } 556 557 bool GCNPassConfig::addGlobalInstructionSelect() { 558 return false; 559 } 560 #endif 561 562 void GCNPassConfig::addPreRegAlloc() { 563 addPass(createSIShrinkInstructionsPass()); 564 addPass(createSIWholeQuadModePass()); 565 } 566 567 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 568 // FIXME: We have to disable the verifier here because of PHIElimination + 569 // TwoAddressInstructions disabling it. 570 571 // This must be run immediately after phi elimination and before 572 // TwoAddressInstructions, otherwise the processing of the tied operand of 573 // SI_ELSE will introduce a copy of the tied operand source after the else. 574 insertPass(&PHIEliminationID, &SILowerControlFlowID, false); 575 576 TargetPassConfig::addFastRegAlloc(RegAllocPass); 577 } 578 579 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 580 // This needs to be run directly before register allocation because earlier 581 // passes might recompute live intervals. 582 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); 583 584 // This must be run immediately after phi elimination and before 585 // TwoAddressInstructions, otherwise the processing of the tied operand of 586 // SI_ELSE will introduce a copy of the tied operand source after the else. 587 insertPass(&PHIEliminationID, &SILowerControlFlowID, false); 588 589 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); 590 } 591 592 void GCNPassConfig::addPostRegAlloc() { 593 addPass(&SIOptimizeExecMaskingID); 594 TargetPassConfig::addPostRegAlloc(); 595 } 596 597 void GCNPassConfig::addPreSched2() { 598 } 599 600 void GCNPassConfig::addPreEmitPass() { 601 // The hazard recognizer that runs as part of the post-ra scheduler does not 602 // guarantee to be able handle all hazards correctly. This is because if there 603 // are multiple scheduling regions in a basic block, the regions are scheduled 604 // bottom up, so when we begin to schedule a region we don't know what 605 // instructions were emitted directly before it. 606 // 607 // Here we add a stand-alone hazard recognizer pass which can handle all 608 // cases. 609 addPass(&PostRAHazardRecognizerID); 610 611 addPass(createSIInsertWaitsPass()); 612 addPass(createSIShrinkInstructionsPass()); 613 addPass(&SIInsertSkipsPassID); 614 addPass(createSIDebuggerInsertNopsPass()); 615 addPass(&BranchRelaxationPassID); 616 } 617 618 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 619 return new GCNPassConfig(this, PM); 620 } 621