1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief The AMDGPU target machine contains all of the hardware specific 12 /// information needed to emit code for R600 and SI GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUTargetMachine.h" 17 #include "AMDGPU.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUTargetObjectFile.h" 20 #include "AMDGPUTargetTransformInfo.h" 21 #include "GCNSchedStrategy.h" 22 #include "R600MachineScheduler.h" 23 #include "SIMachineScheduler.h" 24 #include "llvm/ADT/SmallString.h" 25 #include "llvm/ADT/STLExtras.h" 26 #include "llvm/ADT/StringRef.h" 27 #include "llvm/ADT/Triple.h" 28 #include "llvm/CodeGen/GlobalISel/GISelAccessor.h" 29 #include "llvm/CodeGen/GlobalISel/IRTranslator.h" 30 #include "llvm/CodeGen/MachineScheduler.h" 31 #include "llvm/CodeGen/Passes.h" 32 #include "llvm/CodeGen/TargetPassConfig.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Transforms/IPO.h" 35 #include "llvm/Transforms/IPO/AlwaysInliner.h" 36 #include "llvm/Transforms/Scalar.h" 37 #include "llvm/Transforms/Scalar/GVN.h" 38 #include "llvm/Transforms/Vectorize.h" 39 #include "llvm/IR/Attributes.h" 40 #include "llvm/IR/Function.h" 41 #include "llvm/IR/LegacyPassManager.h" 42 #include "llvm/Pass.h" 43 #include "llvm/Support/CommandLine.h" 44 #include "llvm/Support/Compiler.h" 45 #include "llvm/Target/TargetLoweringObjectFile.h" 46 #include <memory> 47 48 using namespace llvm; 49 50 static cl::opt<bool> EnableR600StructurizeCFG( 51 "r600-ir-structurize", 52 cl::desc("Use StructurizeCFG IR pass"), 53 cl::init(true)); 54 55 static cl::opt<bool> EnableSROA( 56 "amdgpu-sroa", 57 cl::desc("Run SROA after promote alloca pass"), 58 cl::ReallyHidden, 59 cl::init(true)); 60 61 static cl::opt<bool> EnableR600IfConvert( 62 "r600-if-convert", 63 cl::desc("Use if conversion pass"), 64 cl::ReallyHidden, 65 cl::init(true)); 66 67 // Option to disable vectorizer for tests. 68 static cl::opt<bool> EnableLoadStoreVectorizer( 69 "amdgpu-load-store-vectorizer", 70 cl::desc("Enable load store vectorizer"), 71 cl::init(true), 72 cl::Hidden); 73 74 // Option to to control global loads scalarization 75 static cl::opt<bool> ScalarizeGlobal( 76 "amdgpu-scalarize-global-loads", 77 cl::desc("Enable global load scalarization"), 78 cl::init(false), 79 cl::Hidden); 80 81 extern "C" void LLVMInitializeAMDGPUTarget() { 82 // Register the target 83 RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget()); 84 RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget()); 85 86 PassRegistry *PR = PassRegistry::getPassRegistry(); 87 initializeSILowerI1CopiesPass(*PR); 88 initializeSIFixSGPRCopiesPass(*PR); 89 initializeSIFoldOperandsPass(*PR); 90 initializeSIShrinkInstructionsPass(*PR); 91 initializeSIFixControlFlowLiveIntervalsPass(*PR); 92 initializeSILoadStoreOptimizerPass(*PR); 93 initializeAMDGPUAnnotateKernelFeaturesPass(*PR); 94 initializeAMDGPUAnnotateUniformValuesPass(*PR); 95 initializeAMDGPUPromoteAllocaPass(*PR); 96 initializeAMDGPUCodeGenPreparePass(*PR); 97 initializeAMDGPUUnifyMetadataPass(*PR); 98 initializeSIAnnotateControlFlowPass(*PR); 99 initializeSIInsertWaitsPass(*PR); 100 initializeSIWholeQuadModePass(*PR); 101 initializeSILowerControlFlowPass(*PR); 102 initializeSIInsertSkipsPass(*PR); 103 initializeSIDebuggerInsertNopsPass(*PR); 104 initializeSIOptimizeExecMaskingPass(*PR); 105 } 106 107 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 108 return llvm::make_unique<AMDGPUTargetObjectFile>(); 109 } 110 111 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 112 return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>()); 113 } 114 115 static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { 116 return new SIScheduleDAGMI(C); 117 } 118 119 static ScheduleDAGInstrs * 120 createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { 121 ScheduleDAGMILive *DAG = 122 new ScheduleDAGMILive(C, 123 llvm::make_unique<GCNMaxOccupancySchedStrategy>(C)); 124 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 125 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 126 return DAG; 127 } 128 129 static MachineSchedRegistry 130 R600SchedRegistry("r600", "Run R600's custom scheduler", 131 createR600MachineScheduler); 132 133 static MachineSchedRegistry 134 SISchedRegistry("si", "Run SI's custom scheduler", 135 createSIMachineScheduler); 136 137 static MachineSchedRegistry 138 GCNMaxOccupancySchedRegistry("gcn-max-occupancy", 139 "Run GCN scheduler to maximize occupancy", 140 createGCNMaxOccupancyMachineScheduler); 141 142 static StringRef computeDataLayout(const Triple &TT) { 143 if (TT.getArch() == Triple::r600) { 144 // 32-bit pointers. 145 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 146 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 147 } 148 149 // 32-bit private, local, and region pointers. 64-bit global, constant and 150 // flat. 151 return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" 152 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 153 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 154 } 155 156 LLVM_READNONE 157 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) { 158 if (!GPU.empty()) 159 return GPU; 160 161 // HSA only supports CI+, so change the default GPU to a CI for HSA. 162 if (TT.getArch() == Triple::amdgcn) 163 return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti"; 164 165 return "r600"; 166 } 167 168 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { 169 // The AMDGPU toolchain only supports generating shared objects, so we 170 // must always use PIC. 171 return Reloc::PIC_; 172 } 173 174 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, 175 StringRef CPU, StringRef FS, 176 TargetOptions Options, 177 Optional<Reloc::Model> RM, 178 CodeModel::Model CM, 179 CodeGenOpt::Level OptLevel) 180 : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), 181 FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), 182 TLOF(createTLOF(getTargetTriple())) { 183 initAsmInfo(); 184 } 185 186 AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; 187 188 StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const { 189 Attribute GPUAttr = F.getFnAttribute("target-cpu"); 190 return GPUAttr.hasAttribute(Attribute::None) ? 191 getTargetCPU() : GPUAttr.getValueAsString(); 192 } 193 194 StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const { 195 Attribute FSAttr = F.getFnAttribute("target-features"); 196 197 return FSAttr.hasAttribute(Attribute::None) ? 198 getTargetFeatureString() : 199 FSAttr.getValueAsString(); 200 } 201 202 void AMDGPUTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) { 203 PM.add(createAMDGPUUnifyMetadataPass()); 204 } 205 206 //===----------------------------------------------------------------------===// 207 // R600 Target Machine (R600 -> Cayman) 208 //===----------------------------------------------------------------------===// 209 210 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, 211 StringRef CPU, StringRef FS, 212 TargetOptions Options, 213 Optional<Reloc::Model> RM, 214 CodeModel::Model CM, CodeGenOpt::Level OL) 215 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { 216 setRequiresStructuredCFG(true); 217 } 218 219 const R600Subtarget *R600TargetMachine::getSubtargetImpl( 220 const Function &F) const { 221 StringRef GPU = getGPUName(F); 222 StringRef FS = getFeatureString(F); 223 224 SmallString<128> SubtargetKey(GPU); 225 SubtargetKey.append(FS); 226 227 auto &I = SubtargetMap[SubtargetKey]; 228 if (!I) { 229 // This needs to be done before we create a new subtarget since any 230 // creation will depend on the TM and the code generation flags on the 231 // function that reside in TargetOptions. 232 resetTargetOptions(F); 233 I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this); 234 } 235 236 return I.get(); 237 } 238 239 //===----------------------------------------------------------------------===// 240 // GCN Target Machine (SI+) 241 //===----------------------------------------------------------------------===// 242 243 #ifdef LLVM_BUILD_GLOBAL_ISEL 244 namespace { 245 246 struct SIGISelActualAccessor : public GISelAccessor { 247 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 248 const AMDGPUCallLowering *getCallLowering() const override { 249 return CallLoweringInfo.get(); 250 } 251 }; 252 253 } // end anonymous namespace 254 #endif 255 256 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, 257 StringRef CPU, StringRef FS, 258 TargetOptions Options, 259 Optional<Reloc::Model> RM, 260 CodeModel::Model CM, CodeGenOpt::Level OL) 261 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} 262 263 const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { 264 StringRef GPU = getGPUName(F); 265 StringRef FS = getFeatureString(F); 266 267 SmallString<128> SubtargetKey(GPU); 268 SubtargetKey.append(FS); 269 270 auto &I = SubtargetMap[SubtargetKey]; 271 if (!I) { 272 // This needs to be done before we create a new subtarget since any 273 // creation will depend on the TM and the code generation flags on the 274 // function that reside in TargetOptions. 275 resetTargetOptions(F); 276 I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this); 277 278 #ifndef LLVM_BUILD_GLOBAL_ISEL 279 GISelAccessor *GISel = new GISelAccessor(); 280 #else 281 SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); 282 GISel->CallLoweringInfo.reset( 283 new AMDGPUCallLowering(*I->getTargetLowering())); 284 #endif 285 286 I->setGISelAccessor(*GISel); 287 } 288 289 I->setScalarizeGlobalBehavior(ScalarizeGlobal); 290 291 return I.get(); 292 } 293 294 //===----------------------------------------------------------------------===// 295 // AMDGPU Pass Setup 296 //===----------------------------------------------------------------------===// 297 298 namespace { 299 300 class AMDGPUPassConfig : public TargetPassConfig { 301 public: 302 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 303 : TargetPassConfig(TM, PM) { 304 // Exceptions and StackMaps are not supported, so these passes will never do 305 // anything. 306 disablePass(&StackMapLivenessID); 307 disablePass(&FuncletLayoutID); 308 } 309 310 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 311 return getTM<AMDGPUTargetMachine>(); 312 } 313 314 ScheduleDAGInstrs * 315 createMachineScheduler(MachineSchedContext *C) const override { 316 ScheduleDAGMILive *DAG = createGenericSchedLive(C); 317 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 318 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 319 return DAG; 320 } 321 322 void addEarlyCSEOrGVNPass(); 323 void addStraightLineScalarOptimizationPasses(); 324 void addIRPasses() override; 325 void addCodeGenPrepare() override; 326 bool addPreISel() override; 327 bool addInstSelector() override; 328 bool addGCPasses() override; 329 }; 330 331 class R600PassConfig final : public AMDGPUPassConfig { 332 public: 333 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 334 : AMDGPUPassConfig(TM, PM) {} 335 336 ScheduleDAGInstrs *createMachineScheduler( 337 MachineSchedContext *C) const override { 338 return createR600MachineScheduler(C); 339 } 340 341 bool addPreISel() override; 342 void addPreRegAlloc() override; 343 void addPreSched2() override; 344 void addPreEmitPass() override; 345 }; 346 347 class GCNPassConfig final : public AMDGPUPassConfig { 348 public: 349 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 350 : AMDGPUPassConfig(TM, PM) {} 351 352 GCNTargetMachine &getGCNTargetMachine() const { 353 return getTM<GCNTargetMachine>(); 354 } 355 356 ScheduleDAGInstrs * 357 createMachineScheduler(MachineSchedContext *C) const override; 358 359 void addIRPasses() override; 360 bool addPreISel() override; 361 void addMachineSSAOptimization() override; 362 bool addInstSelector() override; 363 #ifdef LLVM_BUILD_GLOBAL_ISEL 364 bool addIRTranslator() override; 365 bool addLegalizeMachineIR() override; 366 bool addRegBankSelect() override; 367 bool addGlobalInstructionSelect() override; 368 #endif 369 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 370 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 371 void addPreRegAlloc() override; 372 void addPostRegAlloc() override; 373 void addPreSched2() override; 374 void addPreEmitPass() override; 375 }; 376 377 } // end anonymous namespace 378 379 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 380 return TargetIRAnalysis([this](const Function &F) { 381 return TargetTransformInfo(AMDGPUTTIImpl(this, F)); 382 }); 383 } 384 385 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() { 386 if (getOptLevel() == CodeGenOpt::Aggressive) 387 addPass(createGVNPass()); 388 else 389 addPass(createEarlyCSEPass()); 390 } 391 392 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() { 393 addPass(createSeparateConstOffsetFromGEPPass()); 394 addPass(createSpeculativeExecutionPass()); 395 // ReassociateGEPs exposes more opportunites for SLSR. See 396 // the example in reassociate-geps-and-slsr.ll. 397 addPass(createStraightLineStrengthReducePass()); 398 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or 399 // EarlyCSE can reuse. 400 addEarlyCSEOrGVNPass(); 401 // Run NaryReassociate after EarlyCSE/GVN to be more effective. 402 addPass(createNaryReassociatePass()); 403 // NaryReassociate on GEPs creates redundant common expressions, so run 404 // EarlyCSE after it. 405 addPass(createEarlyCSEPass()); 406 } 407 408 void AMDGPUPassConfig::addIRPasses() { 409 // There is no reason to run these. 410 disablePass(&StackMapLivenessID); 411 disablePass(&FuncletLayoutID); 412 disablePass(&PatchableFunctionID); 413 414 // Function calls are not supported, so make sure we inline everything. 415 addPass(createAMDGPUAlwaysInlinePass()); 416 addPass(createAlwaysInlinerLegacyPass()); 417 // We need to add the barrier noop pass, otherwise adding the function 418 // inlining pass will cause all of the PassConfigs passes to be run 419 // one function at a time, which means if we have a nodule with two 420 // functions, then we will generate code for the first function 421 // without ever running any passes on the second. 422 addPass(createBarrierNoopPass()); 423 424 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. 425 addPass(createAMDGPUOpenCLImageTypeLoweringPass()); 426 427 const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); 428 if (TM.getOptLevel() > CodeGenOpt::None) { 429 addPass(createAMDGPUPromoteAlloca(&TM)); 430 431 if (EnableSROA) 432 addPass(createSROAPass()); 433 434 addStraightLineScalarOptimizationPasses(); 435 } 436 437 TargetPassConfig::addIRPasses(); 438 439 // EarlyCSE is not always strong enough to clean up what LSR produces. For 440 // example, GVN can combine 441 // 442 // %0 = add %a, %b 443 // %1 = add %b, %a 444 // 445 // and 446 // 447 // %0 = shl nsw %a, 2 448 // %1 = shl %a, 2 449 // 450 // but EarlyCSE can do neither of them. 451 if (getOptLevel() != CodeGenOpt::None) 452 addEarlyCSEOrGVNPass(); 453 } 454 455 void AMDGPUPassConfig::addCodeGenPrepare() { 456 TargetPassConfig::addCodeGenPrepare(); 457 458 if (EnableLoadStoreVectorizer) 459 addPass(createLoadStoreVectorizerPass()); 460 } 461 462 bool AMDGPUPassConfig::addPreISel() { 463 addPass(createFlattenCFGPass()); 464 return false; 465 } 466 467 bool AMDGPUPassConfig::addInstSelector() { 468 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel())); 469 return false; 470 } 471 472 bool AMDGPUPassConfig::addGCPasses() { 473 // Do nothing. GC is not supported. 474 return false; 475 } 476 477 //===----------------------------------------------------------------------===// 478 // R600 Pass Setup 479 //===----------------------------------------------------------------------===// 480 481 bool R600PassConfig::addPreISel() { 482 AMDGPUPassConfig::addPreISel(); 483 484 if (EnableR600StructurizeCFG) 485 addPass(createStructurizeCFGPass()); 486 return false; 487 } 488 489 void R600PassConfig::addPreRegAlloc() { 490 addPass(createR600VectorRegMerger(*TM)); 491 } 492 493 void R600PassConfig::addPreSched2() { 494 addPass(createR600EmitClauseMarkers(), false); 495 if (EnableR600IfConvert) 496 addPass(&IfConverterID, false); 497 addPass(createR600ClauseMergePass(*TM), false); 498 } 499 500 void R600PassConfig::addPreEmitPass() { 501 addPass(createAMDGPUCFGStructurizerPass(), false); 502 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 503 addPass(&FinalizeMachineBundlesID, false); 504 addPass(createR600Packetizer(*TM), false); 505 addPass(createR600ControlFlowFinalizer(*TM), false); 506 } 507 508 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 509 return new R600PassConfig(this, PM); 510 } 511 512 //===----------------------------------------------------------------------===// 513 // GCN Pass Setup 514 //===----------------------------------------------------------------------===// 515 516 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( 517 MachineSchedContext *C) const { 518 const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>(); 519 if (ST.enableSIScheduler()) 520 return createSIMachineScheduler(C); 521 return createGCNMaxOccupancyMachineScheduler(C); 522 } 523 524 bool GCNPassConfig::addPreISel() { 525 AMDGPUPassConfig::addPreISel(); 526 527 // FIXME: We need to run a pass to propagate the attributes when calls are 528 // supported. 529 addPass(&AMDGPUAnnotateKernelFeaturesID); 530 addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions 531 addPass(createSinkingPass()); 532 addPass(createSITypeRewriter()); 533 addPass(createAMDGPUAnnotateUniformValues()); 534 addPass(createSIAnnotateControlFlowPass()); 535 536 return false; 537 } 538 539 void GCNPassConfig::addMachineSSAOptimization() { 540 TargetPassConfig::addMachineSSAOptimization(); 541 542 // We want to fold operands after PeepholeOptimizer has run (or as part of 543 // it), because it will eliminate extra copies making it easier to fold the 544 // real source operand. We want to eliminate dead instructions after, so that 545 // we see fewer uses of the copies. We then need to clean up the dead 546 // instructions leftover after the operands are folded as well. 547 // 548 // XXX - Can we get away without running DeadMachineInstructionElim again? 549 addPass(&SIFoldOperandsID); 550 addPass(&DeadMachineInstructionElimID); 551 addPass(&SILoadStoreOptimizerID); 552 } 553 554 void GCNPassConfig::addIRPasses() { 555 // TODO: May want to move later or split into an early and late one. 556 addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine())); 557 558 AMDGPUPassConfig::addIRPasses(); 559 } 560 561 bool GCNPassConfig::addInstSelector() { 562 AMDGPUPassConfig::addInstSelector(); 563 addPass(createSILowerI1CopiesPass()); 564 addPass(&SIFixSGPRCopiesID); 565 return false; 566 } 567 568 #ifdef LLVM_BUILD_GLOBAL_ISEL 569 bool GCNPassConfig::addIRTranslator() { 570 addPass(new IRTranslator()); 571 return false; 572 } 573 574 bool GCNPassConfig::addLegalizeMachineIR() { 575 return false; 576 } 577 578 bool GCNPassConfig::addRegBankSelect() { 579 return false; 580 } 581 582 bool GCNPassConfig::addGlobalInstructionSelect() { 583 return false; 584 } 585 #endif 586 587 void GCNPassConfig::addPreRegAlloc() { 588 addPass(createSIShrinkInstructionsPass()); 589 addPass(createSIWholeQuadModePass()); 590 } 591 592 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 593 // FIXME: We have to disable the verifier here because of PHIElimination + 594 // TwoAddressInstructions disabling it. 595 596 // This must be run immediately after phi elimination and before 597 // TwoAddressInstructions, otherwise the processing of the tied operand of 598 // SI_ELSE will introduce a copy of the tied operand source after the else. 599 insertPass(&PHIEliminationID, &SILowerControlFlowID, false); 600 601 TargetPassConfig::addFastRegAlloc(RegAllocPass); 602 } 603 604 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 605 // This needs to be run directly before register allocation because earlier 606 // passes might recompute live intervals. 607 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); 608 609 // This must be run immediately after phi elimination and before 610 // TwoAddressInstructions, otherwise the processing of the tied operand of 611 // SI_ELSE will introduce a copy of the tied operand source after the else. 612 insertPass(&PHIEliminationID, &SILowerControlFlowID, false); 613 614 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); 615 } 616 617 void GCNPassConfig::addPostRegAlloc() { 618 addPass(&SIOptimizeExecMaskingID); 619 TargetPassConfig::addPostRegAlloc(); 620 } 621 622 void GCNPassConfig::addPreSched2() { 623 } 624 625 void GCNPassConfig::addPreEmitPass() { 626 // The hazard recognizer that runs as part of the post-ra scheduler does not 627 // guarantee to be able handle all hazards correctly. This is because if there 628 // are multiple scheduling regions in a basic block, the regions are scheduled 629 // bottom up, so when we begin to schedule a region we don't know what 630 // instructions were emitted directly before it. 631 // 632 // Here we add a stand-alone hazard recognizer pass which can handle all 633 // cases. 634 addPass(&PostRAHazardRecognizerID); 635 636 addPass(createSIInsertWaitsPass()); 637 addPass(createSIShrinkInstructionsPass()); 638 addPass(&SIInsertSkipsPassID); 639 addPass(createSIDebuggerInsertNopsPass()); 640 addPass(&BranchRelaxationPassID); 641 } 642 643 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 644 return new GCNPassConfig(this, PM); 645 } 646