1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief The AMDGPU target machine contains all of the hardware specific 12 /// information needed to emit code for R600 and SI GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUTargetMachine.h" 17 #include "AMDGPU.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUTargetObjectFile.h" 20 #include "AMDGPUTargetTransformInfo.h" 21 #include "GCNSchedStrategy.h" 22 #include "R600ISelLowering.h" 23 #include "R600InstrInfo.h" 24 #include "R600MachineScheduler.h" 25 #include "SIISelLowering.h" 26 #include "SIInstrInfo.h" 27 #include "SIMachineScheduler.h" 28 #include "llvm/CodeGen/GlobalISel/IRTranslator.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/CodeGen/TargetPassConfig.h" 31 #include "llvm/Support/TargetRegistry.h" 32 #include "llvm/Transforms/IPO.h" 33 #include "llvm/Transforms/IPO/AlwaysInliner.h" 34 #include "llvm/Transforms/Scalar.h" 35 #include "llvm/Transforms/Scalar/GVN.h" 36 #include "llvm/Transforms/Vectorize.h" 37 #include "llvm/IR/LegacyPassManager.h" 38 39 using namespace llvm; 40 41 static cl::opt<bool> EnableR600StructurizeCFG( 42 "r600-ir-structurize", 43 cl::desc("Use StructurizeCFG IR pass"), 44 cl::init(true)); 45 46 static cl::opt<bool> EnableSROA( 47 "amdgpu-sroa", 48 cl::desc("Run SROA after promote alloca pass"), 49 cl::ReallyHidden, 50 cl::init(true)); 51 52 static cl::opt<bool> EnableR600IfConvert( 53 "r600-if-convert", 54 cl::desc("Use if conversion pass"), 55 cl::ReallyHidden, 56 cl::init(true)); 57 58 // Option to disable vectorizer for tests. 59 static cl::opt<bool> EnableLoadStoreVectorizer( 60 "amdgpu-load-store-vectorizer", 61 cl::desc("Enable load store vectorizer"), 62 cl::init(true), 63 cl::Hidden); 64 65 // Option to to control global loads scalarization 66 static cl::opt<bool> ScalarizeGlobal( 67 "amdgpu-scalarize-global-loads", 68 cl::desc("Enable global load scalarization"), 69 cl::init(false), 70 cl::Hidden); 71 72 73 extern "C" void LLVMInitializeAMDGPUTarget() { 74 // Register the target 75 RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget()); 76 RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget()); 77 78 PassRegistry *PR = PassRegistry::getPassRegistry(); 79 initializeSILowerI1CopiesPass(*PR); 80 initializeSIFixSGPRCopiesPass(*PR); 81 initializeSIFoldOperandsPass(*PR); 82 initializeSIShrinkInstructionsPass(*PR); 83 initializeSIFixControlFlowLiveIntervalsPass(*PR); 84 initializeSILoadStoreOptimizerPass(*PR); 85 initializeAMDGPUAnnotateKernelFeaturesPass(*PR); 86 initializeAMDGPUAnnotateUniformValuesPass(*PR); 87 initializeAMDGPUPromoteAllocaPass(*PR); 88 initializeAMDGPUCodeGenPreparePass(*PR); 89 initializeAMDGPUUnifyMetadataPass(*PR); 90 initializeSIAnnotateControlFlowPass(*PR); 91 initializeSIInsertWaitsPass(*PR); 92 initializeSIWholeQuadModePass(*PR); 93 initializeSILowerControlFlowPass(*PR); 94 initializeSIInsertSkipsPass(*PR); 95 initializeSIDebuggerInsertNopsPass(*PR); 96 initializeSIOptimizeExecMaskingPass(*PR); 97 } 98 99 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 100 return make_unique<AMDGPUTargetObjectFile>(); 101 } 102 103 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 104 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>()); 105 } 106 107 static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { 108 return new SIScheduleDAGMI(C); 109 } 110 111 static ScheduleDAGInstrs * 112 createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { 113 ScheduleDAGMILive *DAG = 114 new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C)); 115 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 116 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 117 return DAG; 118 } 119 120 static MachineSchedRegistry 121 R600SchedRegistry("r600", "Run R600's custom scheduler", 122 createR600MachineScheduler); 123 124 static MachineSchedRegistry 125 SISchedRegistry("si", "Run SI's custom scheduler", 126 createSIMachineScheduler); 127 128 static MachineSchedRegistry 129 GCNMaxOccupancySchedRegistry("gcn-max-occupancy", 130 "Run GCN scheduler to maximize occupancy", 131 createGCNMaxOccupancyMachineScheduler); 132 133 static StringRef computeDataLayout(const Triple &TT) { 134 if (TT.getArch() == Triple::r600) { 135 // 32-bit pointers. 136 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 137 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 138 } 139 140 // 32-bit private, local, and region pointers. 64-bit global, constant and 141 // flat. 142 return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" 143 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 144 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 145 } 146 147 LLVM_READNONE 148 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) { 149 if (!GPU.empty()) 150 return GPU; 151 152 // HSA only supports CI+, so change the default GPU to a CI for HSA. 153 if (TT.getArch() == Triple::amdgcn) 154 return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti"; 155 156 return "r600"; 157 } 158 159 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { 160 // The AMDGPU toolchain only supports generating shared objects, so we 161 // must always use PIC. 162 return Reloc::PIC_; 163 } 164 165 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, 166 StringRef CPU, StringRef FS, 167 TargetOptions Options, 168 Optional<Reloc::Model> RM, 169 CodeModel::Model CM, 170 CodeGenOpt::Level OptLevel) 171 : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), 172 FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), 173 TLOF(createTLOF(getTargetTriple())), 174 IntrinsicInfo() { 175 initAsmInfo(); 176 } 177 178 AMDGPUTargetMachine::~AMDGPUTargetMachine() { } 179 180 StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const { 181 Attribute GPUAttr = F.getFnAttribute("target-cpu"); 182 return GPUAttr.hasAttribute(Attribute::None) ? 183 getTargetCPU() : GPUAttr.getValueAsString(); 184 } 185 186 StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const { 187 Attribute FSAttr = F.getFnAttribute("target-features"); 188 189 return FSAttr.hasAttribute(Attribute::None) ? 190 getTargetFeatureString() : 191 FSAttr.getValueAsString(); 192 } 193 194 void AMDGPUTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) { 195 PM.add(llvm::createAMDGPUUnifyMetadataPass()); 196 } 197 198 //===----------------------------------------------------------------------===// 199 // R600 Target Machine (R600 -> Cayman) 200 //===----------------------------------------------------------------------===// 201 202 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, 203 StringRef CPU, StringRef FS, 204 TargetOptions Options, 205 Optional<Reloc::Model> RM, 206 CodeModel::Model CM, CodeGenOpt::Level OL) 207 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { 208 setRequiresStructuredCFG(true); 209 } 210 211 const R600Subtarget *R600TargetMachine::getSubtargetImpl( 212 const Function &F) const { 213 StringRef GPU = getGPUName(F); 214 StringRef FS = getFeatureString(F); 215 216 SmallString<128> SubtargetKey(GPU); 217 SubtargetKey.append(FS); 218 219 auto &I = SubtargetMap[SubtargetKey]; 220 if (!I) { 221 // This needs to be done before we create a new subtarget since any 222 // creation will depend on the TM and the code generation flags on the 223 // function that reside in TargetOptions. 224 resetTargetOptions(F); 225 I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this); 226 } 227 228 return I.get(); 229 } 230 231 //===----------------------------------------------------------------------===// 232 // GCN Target Machine (SI+) 233 //===----------------------------------------------------------------------===// 234 235 #ifdef LLVM_BUILD_GLOBAL_ISEL 236 namespace { 237 struct SIGISelActualAccessor : public GISelAccessor { 238 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 239 const AMDGPUCallLowering *getCallLowering() const override { 240 return CallLoweringInfo.get(); 241 } 242 }; 243 } // End anonymous namespace. 244 #endif 245 246 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, 247 StringRef CPU, StringRef FS, 248 TargetOptions Options, 249 Optional<Reloc::Model> RM, 250 CodeModel::Model CM, CodeGenOpt::Level OL) 251 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} 252 253 const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { 254 StringRef GPU = getGPUName(F); 255 StringRef FS = getFeatureString(F); 256 257 SmallString<128> SubtargetKey(GPU); 258 SubtargetKey.append(FS); 259 260 auto &I = SubtargetMap[SubtargetKey]; 261 if (!I) { 262 // This needs to be done before we create a new subtarget since any 263 // creation will depend on the TM and the code generation flags on the 264 // function that reside in TargetOptions. 265 resetTargetOptions(F); 266 I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this); 267 268 #ifndef LLVM_BUILD_GLOBAL_ISEL 269 GISelAccessor *GISel = new GISelAccessor(); 270 #else 271 SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); 272 GISel->CallLoweringInfo.reset( 273 new AMDGPUCallLowering(*I->getTargetLowering())); 274 #endif 275 276 I->setGISelAccessor(*GISel); 277 } 278 279 I->setScalarizeGlobalBehavior(ScalarizeGlobal); 280 281 return I.get(); 282 } 283 284 //===----------------------------------------------------------------------===// 285 // AMDGPU Pass Setup 286 //===----------------------------------------------------------------------===// 287 288 namespace { 289 290 class AMDGPUPassConfig : public TargetPassConfig { 291 public: 292 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 293 : TargetPassConfig(TM, PM) { 294 295 // Exceptions and StackMaps are not supported, so these passes will never do 296 // anything. 297 disablePass(&StackMapLivenessID); 298 disablePass(&FuncletLayoutID); 299 } 300 301 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 302 return getTM<AMDGPUTargetMachine>(); 303 } 304 305 ScheduleDAGInstrs * 306 createMachineScheduler(MachineSchedContext *C) const override { 307 ScheduleDAGMILive *DAG = createGenericSchedLive(C); 308 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 309 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 310 return DAG; 311 } 312 313 void addEarlyCSEOrGVNPass(); 314 void addStraightLineScalarOptimizationPasses(); 315 void addIRPasses() override; 316 void addCodeGenPrepare() override; 317 bool addPreISel() override; 318 bool addInstSelector() override; 319 bool addGCPasses() override; 320 }; 321 322 class R600PassConfig final : public AMDGPUPassConfig { 323 public: 324 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 325 : AMDGPUPassConfig(TM, PM) { } 326 327 ScheduleDAGInstrs *createMachineScheduler( 328 MachineSchedContext *C) const override { 329 return createR600MachineScheduler(C); 330 } 331 332 bool addPreISel() override; 333 void addPreRegAlloc() override; 334 void addPreSched2() override; 335 void addPreEmitPass() override; 336 }; 337 338 class GCNPassConfig final : public AMDGPUPassConfig { 339 public: 340 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 341 : AMDGPUPassConfig(TM, PM) { } 342 343 GCNTargetMachine &getGCNTargetMachine() const { 344 return getTM<GCNTargetMachine>(); 345 } 346 347 ScheduleDAGInstrs * 348 createMachineScheduler(MachineSchedContext *C) const override; 349 350 void addIRPasses() override; 351 bool addPreISel() override; 352 void addMachineSSAOptimization() override; 353 bool addInstSelector() override; 354 #ifdef LLVM_BUILD_GLOBAL_ISEL 355 bool addIRTranslator() override; 356 bool addLegalizeMachineIR() override; 357 bool addRegBankSelect() override; 358 bool addGlobalInstructionSelect() override; 359 #endif 360 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 361 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 362 void addPreRegAlloc() override; 363 void addPostRegAlloc() override; 364 void addPreSched2() override; 365 void addPreEmitPass() override; 366 }; 367 368 } // End of anonymous namespace 369 370 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 371 return TargetIRAnalysis([this](const Function &F) { 372 return TargetTransformInfo(AMDGPUTTIImpl(this, F)); 373 }); 374 } 375 376 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() { 377 if (getOptLevel() == CodeGenOpt::Aggressive) 378 addPass(createGVNPass()); 379 else 380 addPass(createEarlyCSEPass()); 381 } 382 383 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() { 384 addPass(createSeparateConstOffsetFromGEPPass()); 385 addPass(createSpeculativeExecutionPass()); 386 // ReassociateGEPs exposes more opportunites for SLSR. See 387 // the example in reassociate-geps-and-slsr.ll. 388 addPass(createStraightLineStrengthReducePass()); 389 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or 390 // EarlyCSE can reuse. 391 addEarlyCSEOrGVNPass(); 392 // Run NaryReassociate after EarlyCSE/GVN to be more effective. 393 addPass(createNaryReassociatePass()); 394 // NaryReassociate on GEPs creates redundant common expressions, so run 395 // EarlyCSE after it. 396 addPass(createEarlyCSEPass()); 397 } 398 399 void AMDGPUPassConfig::addIRPasses() { 400 // There is no reason to run these. 401 disablePass(&StackMapLivenessID); 402 disablePass(&FuncletLayoutID); 403 disablePass(&PatchableFunctionID); 404 405 // Function calls are not supported, so make sure we inline everything. 406 addPass(createAMDGPUAlwaysInlinePass()); 407 addPass(createAlwaysInlinerLegacyPass()); 408 // We need to add the barrier noop pass, otherwise adding the function 409 // inlining pass will cause all of the PassConfigs passes to be run 410 // one function at a time, which means if we have a nodule with two 411 // functions, then we will generate code for the first function 412 // without ever running any passes on the second. 413 addPass(createBarrierNoopPass()); 414 415 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. 416 addPass(createAMDGPUOpenCLImageTypeLoweringPass()); 417 418 const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); 419 if (TM.getOptLevel() > CodeGenOpt::None) { 420 addPass(createAMDGPUPromoteAlloca(&TM)); 421 422 if (EnableSROA) 423 addPass(createSROAPass()); 424 425 addStraightLineScalarOptimizationPasses(); 426 } 427 428 TargetPassConfig::addIRPasses(); 429 430 // EarlyCSE is not always strong enough to clean up what LSR produces. For 431 // example, GVN can combine 432 // 433 // %0 = add %a, %b 434 // %1 = add %b, %a 435 // 436 // and 437 // 438 // %0 = shl nsw %a, 2 439 // %1 = shl %a, 2 440 // 441 // but EarlyCSE can do neither of them. 442 if (getOptLevel() != CodeGenOpt::None) 443 addEarlyCSEOrGVNPass(); 444 } 445 446 void AMDGPUPassConfig::addCodeGenPrepare() { 447 TargetPassConfig::addCodeGenPrepare(); 448 449 if (EnableLoadStoreVectorizer) 450 addPass(createLoadStoreVectorizerPass()); 451 } 452 453 bool AMDGPUPassConfig::addPreISel() { 454 addPass(createFlattenCFGPass()); 455 return false; 456 } 457 458 bool AMDGPUPassConfig::addInstSelector() { 459 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel())); 460 return false; 461 } 462 463 bool AMDGPUPassConfig::addGCPasses() { 464 // Do nothing. GC is not supported. 465 return false; 466 } 467 468 //===----------------------------------------------------------------------===// 469 // R600 Pass Setup 470 //===----------------------------------------------------------------------===// 471 472 bool R600PassConfig::addPreISel() { 473 AMDGPUPassConfig::addPreISel(); 474 475 if (EnableR600StructurizeCFG) 476 addPass(createStructurizeCFGPass()); 477 return false; 478 } 479 480 void R600PassConfig::addPreRegAlloc() { 481 addPass(createR600VectorRegMerger(*TM)); 482 } 483 484 void R600PassConfig::addPreSched2() { 485 addPass(createR600EmitClauseMarkers(), false); 486 if (EnableR600IfConvert) 487 addPass(&IfConverterID, false); 488 addPass(createR600ClauseMergePass(*TM), false); 489 } 490 491 void R600PassConfig::addPreEmitPass() { 492 addPass(createAMDGPUCFGStructurizerPass(), false); 493 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 494 addPass(&FinalizeMachineBundlesID, false); 495 addPass(createR600Packetizer(*TM), false); 496 addPass(createR600ControlFlowFinalizer(*TM), false); 497 } 498 499 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 500 return new R600PassConfig(this, PM); 501 } 502 503 //===----------------------------------------------------------------------===// 504 // GCN Pass Setup 505 //===----------------------------------------------------------------------===// 506 507 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( 508 MachineSchedContext *C) const { 509 const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>(); 510 if (ST.enableSIScheduler()) 511 return createSIMachineScheduler(C); 512 return createGCNMaxOccupancyMachineScheduler(C); 513 } 514 515 bool GCNPassConfig::addPreISel() { 516 AMDGPUPassConfig::addPreISel(); 517 518 // FIXME: We need to run a pass to propagate the attributes when calls are 519 // supported. 520 addPass(&AMDGPUAnnotateKernelFeaturesID); 521 addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions 522 addPass(createSinkingPass()); 523 addPass(createSITypeRewriter()); 524 addPass(createAMDGPUAnnotateUniformValues()); 525 addPass(createSIAnnotateControlFlowPass()); 526 527 return false; 528 } 529 530 void GCNPassConfig::addMachineSSAOptimization() { 531 TargetPassConfig::addMachineSSAOptimization(); 532 533 // We want to fold operands after PeepholeOptimizer has run (or as part of 534 // it), because it will eliminate extra copies making it easier to fold the 535 // real source operand. We want to eliminate dead instructions after, so that 536 // we see fewer uses of the copies. We then need to clean up the dead 537 // instructions leftover after the operands are folded as well. 538 // 539 // XXX - Can we get away without running DeadMachineInstructionElim again? 540 addPass(&SIFoldOperandsID); 541 addPass(&DeadMachineInstructionElimID); 542 addPass(&SILoadStoreOptimizerID); 543 } 544 545 void GCNPassConfig::addIRPasses() { 546 // TODO: May want to move later or split into an early and late one. 547 addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine())); 548 549 AMDGPUPassConfig::addIRPasses(); 550 } 551 552 bool GCNPassConfig::addInstSelector() { 553 AMDGPUPassConfig::addInstSelector(); 554 addPass(createSILowerI1CopiesPass()); 555 addPass(&SIFixSGPRCopiesID); 556 return false; 557 } 558 559 #ifdef LLVM_BUILD_GLOBAL_ISEL 560 bool GCNPassConfig::addIRTranslator() { 561 addPass(new IRTranslator()); 562 return false; 563 } 564 565 bool GCNPassConfig::addLegalizeMachineIR() { 566 return false; 567 } 568 569 bool GCNPassConfig::addRegBankSelect() { 570 return false; 571 } 572 573 bool GCNPassConfig::addGlobalInstructionSelect() { 574 return false; 575 } 576 #endif 577 578 void GCNPassConfig::addPreRegAlloc() { 579 addPass(createSIShrinkInstructionsPass()); 580 addPass(createSIWholeQuadModePass()); 581 } 582 583 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 584 // FIXME: We have to disable the verifier here because of PHIElimination + 585 // TwoAddressInstructions disabling it. 586 587 // This must be run immediately after phi elimination and before 588 // TwoAddressInstructions, otherwise the processing of the tied operand of 589 // SI_ELSE will introduce a copy of the tied operand source after the else. 590 insertPass(&PHIEliminationID, &SILowerControlFlowID, false); 591 592 TargetPassConfig::addFastRegAlloc(RegAllocPass); 593 } 594 595 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 596 // This needs to be run directly before register allocation because earlier 597 // passes might recompute live intervals. 598 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); 599 600 // This must be run immediately after phi elimination and before 601 // TwoAddressInstructions, otherwise the processing of the tied operand of 602 // SI_ELSE will introduce a copy of the tied operand source after the else. 603 insertPass(&PHIEliminationID, &SILowerControlFlowID, false); 604 605 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); 606 } 607 608 void GCNPassConfig::addPostRegAlloc() { 609 addPass(&SIOptimizeExecMaskingID); 610 TargetPassConfig::addPostRegAlloc(); 611 } 612 613 void GCNPassConfig::addPreSched2() { 614 } 615 616 void GCNPassConfig::addPreEmitPass() { 617 // The hazard recognizer that runs as part of the post-ra scheduler does not 618 // guarantee to be able handle all hazards correctly. This is because if there 619 // are multiple scheduling regions in a basic block, the regions are scheduled 620 // bottom up, so when we begin to schedule a region we don't know what 621 // instructions were emitted directly before it. 622 // 623 // Here we add a stand-alone hazard recognizer pass which can handle all 624 // cases. 625 addPass(&PostRAHazardRecognizerID); 626 627 addPass(createSIInsertWaitsPass()); 628 addPass(createSIShrinkInstructionsPass()); 629 addPass(&SIInsertSkipsPassID); 630 addPass(createSIDebuggerInsertNopsPass()); 631 addPass(&BranchRelaxationPassID); 632 } 633 634 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 635 return new GCNPassConfig(this, PM); 636 } 637