1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief The AMDGPU target machine contains all of the hardware specific 12 /// information needed to emit code for R600 and SI GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUTargetMachine.h" 17 #include "AMDGPU.h" 18 #include "AMDGPUCallLowering.h" 19 #include "AMDGPUInstructionSelector.h" 20 #include "AMDGPULegalizerInfo.h" 21 #ifdef LLVM_BUILD_GLOBAL_ISEL 22 #include "AMDGPURegisterBankInfo.h" 23 #endif 24 #include "AMDGPUTargetObjectFile.h" 25 #include "AMDGPUTargetTransformInfo.h" 26 #include "GCNSchedStrategy.h" 27 #include "R600MachineScheduler.h" 28 #include "SIMachineScheduler.h" 29 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" 30 #include "llvm/CodeGen/GlobalISel/IRTranslator.h" 31 #include "llvm/CodeGen/GlobalISel/Legalizer.h" 32 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" 33 #include "llvm/CodeGen/Passes.h" 34 #include "llvm/CodeGen/TargetPassConfig.h" 35 #include "llvm/Support/TargetRegistry.h" 36 #include "llvm/Transforms/IPO.h" 37 #include "llvm/Transforms/IPO/AlwaysInliner.h" 38 #include "llvm/Transforms/IPO/PassManagerBuilder.h" 39 #include "llvm/Transforms/Scalar.h" 40 #include "llvm/Transforms/Scalar/GVN.h" 41 #include "llvm/Transforms/Vectorize.h" 42 #include "llvm/IR/Attributes.h" 43 #include "llvm/IR/Function.h" 44 #include "llvm/IR/LegacyPassManager.h" 45 #include "llvm/Pass.h" 46 #include "llvm/Support/CommandLine.h" 47 #include "llvm/Support/Compiler.h" 48 #include "llvm/Target/TargetLoweringObjectFile.h" 49 #include <memory> 50 51 using namespace llvm; 52 53 static cl::opt<bool> EnableR600StructurizeCFG( 54 "r600-ir-structurize", 55 cl::desc("Use StructurizeCFG IR pass"), 56 cl::init(true)); 57 58 static cl::opt<bool> EnableSROA( 59 "amdgpu-sroa", 60 cl::desc("Run SROA after promote alloca pass"), 61 cl::ReallyHidden, 62 cl::init(true)); 63 64 static cl::opt<bool> 65 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden, 66 cl::desc("Run early if-conversion"), 67 cl::init(false)); 68 69 static cl::opt<bool> EnableR600IfConvert( 70 "r600-if-convert", 71 cl::desc("Use if conversion pass"), 72 cl::ReallyHidden, 73 cl::init(true)); 74 75 // Option to disable vectorizer for tests. 76 static cl::opt<bool> EnableLoadStoreVectorizer( 77 "amdgpu-load-store-vectorizer", 78 cl::desc("Enable load store vectorizer"), 79 cl::init(true), 80 cl::Hidden); 81 82 // Option to to control global loads scalarization 83 static cl::opt<bool> ScalarizeGlobal( 84 "amdgpu-scalarize-global-loads", 85 cl::desc("Enable global load scalarization"), 86 cl::init(false), 87 cl::Hidden); 88 89 // Option to run internalize pass. 90 static cl::opt<bool> InternalizeSymbols( 91 "amdgpu-internalize-symbols", 92 cl::desc("Enable elimination of non-kernel functions and unused globals"), 93 cl::init(false), 94 cl::Hidden); 95 96 extern "C" void LLVMInitializeAMDGPUTarget() { 97 // Register the target 98 RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget()); 99 RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget()); 100 101 PassRegistry *PR = PassRegistry::getPassRegistry(); 102 initializeSILowerI1CopiesPass(*PR); 103 initializeSIFixSGPRCopiesPass(*PR); 104 initializeSIFixVGPRCopiesPass(*PR); 105 initializeSIFoldOperandsPass(*PR); 106 initializeSIShrinkInstructionsPass(*PR); 107 initializeSIFixControlFlowLiveIntervalsPass(*PR); 108 initializeSILoadStoreOptimizerPass(*PR); 109 initializeAMDGPUAnnotateKernelFeaturesPass(*PR); 110 initializeAMDGPUAnnotateUniformValuesPass(*PR); 111 initializeAMDGPULowerIntrinsicsPass(*PR); 112 initializeAMDGPUPromoteAllocaPass(*PR); 113 initializeAMDGPUCodeGenPreparePass(*PR); 114 initializeAMDGPUUnifyMetadataPass(*PR); 115 initializeSIAnnotateControlFlowPass(*PR); 116 initializeSIInsertWaitsPass(*PR); 117 initializeSIWholeQuadModePass(*PR); 118 initializeSILowerControlFlowPass(*PR); 119 initializeSIInsertSkipsPass(*PR); 120 initializeSIDebuggerInsertNopsPass(*PR); 121 initializeSIOptimizeExecMaskingPass(*PR); 122 } 123 124 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 125 return llvm::make_unique<AMDGPUTargetObjectFile>(); 126 } 127 128 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 129 return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>()); 130 } 131 132 static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) { 133 return new SIScheduleDAGMI(C); 134 } 135 136 static ScheduleDAGInstrs * 137 createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) { 138 ScheduleDAGMILive *DAG = 139 new ScheduleDAGMILive(C, 140 llvm::make_unique<GCNMaxOccupancySchedStrategy>(C)); 141 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 142 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 143 return DAG; 144 } 145 146 static MachineSchedRegistry 147 R600SchedRegistry("r600", "Run R600's custom scheduler", 148 createR600MachineScheduler); 149 150 static MachineSchedRegistry 151 SISchedRegistry("si", "Run SI's custom scheduler", 152 createSIMachineScheduler); 153 154 static MachineSchedRegistry 155 GCNMaxOccupancySchedRegistry("gcn-max-occupancy", 156 "Run GCN scheduler to maximize occupancy", 157 createGCNMaxOccupancyMachineScheduler); 158 159 static StringRef computeDataLayout(const Triple &TT) { 160 if (TT.getArch() == Triple::r600) { 161 // 32-bit pointers. 162 return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 163 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 164 } 165 166 // 32-bit private, local, and region pointers. 64-bit global, constant and 167 // flat. 168 return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32" 169 "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128" 170 "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"; 171 } 172 173 LLVM_READNONE 174 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) { 175 if (!GPU.empty()) 176 return GPU; 177 178 // HSA only supports CI+, so change the default GPU to a CI for HSA. 179 if (TT.getArch() == Triple::amdgcn) 180 return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti"; 181 182 return "r600"; 183 } 184 185 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) { 186 // The AMDGPU toolchain only supports generating shared objects, so we 187 // must always use PIC. 188 return Reloc::PIC_; 189 } 190 191 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, 192 StringRef CPU, StringRef FS, 193 TargetOptions Options, 194 Optional<Reloc::Model> RM, 195 CodeModel::Model CM, 196 CodeGenOpt::Level OptLevel) 197 : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU), 198 FS, Options, getEffectiveRelocModel(RM), CM, OptLevel), 199 TLOF(createTLOF(getTargetTriple())) { 200 initAsmInfo(); 201 } 202 203 AMDGPUTargetMachine::~AMDGPUTargetMachine() = default; 204 205 StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const { 206 Attribute GPUAttr = F.getFnAttribute("target-cpu"); 207 return GPUAttr.hasAttribute(Attribute::None) ? 208 getTargetCPU() : GPUAttr.getValueAsString(); 209 } 210 211 StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const { 212 Attribute FSAttr = F.getFnAttribute("target-features"); 213 214 return FSAttr.hasAttribute(Attribute::None) ? 215 getTargetFeatureString() : 216 FSAttr.getValueAsString(); 217 } 218 219 void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) { 220 bool Internalize = InternalizeSymbols && 221 (getOptLevel() > CodeGenOpt::None) && 222 (getTargetTriple().getArch() == Triple::amdgcn); 223 Builder.addExtension( 224 PassManagerBuilder::EP_ModuleOptimizerEarly, 225 [Internalize](const PassManagerBuilder &, legacy::PassManagerBase &PM) { 226 PM.add(createAMDGPUUnifyMetadataPass()); 227 if (Internalize) { 228 PM.add(createInternalizePass([=](const GlobalValue &GV) -> bool { 229 if (const Function *F = dyn_cast<Function>(&GV)) { 230 if (F->isDeclaration()) 231 return true; 232 switch (F->getCallingConv()) { 233 default: 234 return false; 235 case CallingConv::AMDGPU_VS: 236 case CallingConv::AMDGPU_GS: 237 case CallingConv::AMDGPU_PS: 238 case CallingConv::AMDGPU_CS: 239 case CallingConv::AMDGPU_KERNEL: 240 case CallingConv::SPIR_KERNEL: 241 return true; 242 } 243 } 244 return !GV.use_empty(); 245 })); 246 PM.add(createGlobalDCEPass()); 247 } 248 }); 249 } 250 251 //===----------------------------------------------------------------------===// 252 // R600 Target Machine (R600 -> Cayman) 253 //===----------------------------------------------------------------------===// 254 255 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, 256 StringRef CPU, StringRef FS, 257 TargetOptions Options, 258 Optional<Reloc::Model> RM, 259 CodeModel::Model CM, CodeGenOpt::Level OL) 260 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) { 261 setRequiresStructuredCFG(true); 262 } 263 264 const R600Subtarget *R600TargetMachine::getSubtargetImpl( 265 const Function &F) const { 266 StringRef GPU = getGPUName(F); 267 StringRef FS = getFeatureString(F); 268 269 SmallString<128> SubtargetKey(GPU); 270 SubtargetKey.append(FS); 271 272 auto &I = SubtargetMap[SubtargetKey]; 273 if (!I) { 274 // This needs to be done before we create a new subtarget since any 275 // creation will depend on the TM and the code generation flags on the 276 // function that reside in TargetOptions. 277 resetTargetOptions(F); 278 I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this); 279 } 280 281 return I.get(); 282 } 283 284 //===----------------------------------------------------------------------===// 285 // GCN Target Machine (SI+) 286 //===----------------------------------------------------------------------===// 287 288 #ifdef LLVM_BUILD_GLOBAL_ISEL 289 namespace { 290 291 struct SIGISelActualAccessor : public GISelAccessor { 292 std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo; 293 std::unique_ptr<InstructionSelector> InstSelector; 294 std::unique_ptr<LegalizerInfo> Legalizer; 295 std::unique_ptr<RegisterBankInfo> RegBankInfo; 296 const AMDGPUCallLowering *getCallLowering() const override { 297 return CallLoweringInfo.get(); 298 } 299 const InstructionSelector *getInstructionSelector() const override { 300 return InstSelector.get(); 301 } 302 const LegalizerInfo *getLegalizerInfo() const override { 303 return Legalizer.get(); 304 } 305 const RegisterBankInfo *getRegBankInfo() const override { 306 return RegBankInfo.get(); 307 } 308 }; 309 310 } // end anonymous namespace 311 #endif 312 313 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, 314 StringRef CPU, StringRef FS, 315 TargetOptions Options, 316 Optional<Reloc::Model> RM, 317 CodeModel::Model CM, CodeGenOpt::Level OL) 318 : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {} 319 320 const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const { 321 StringRef GPU = getGPUName(F); 322 StringRef FS = getFeatureString(F); 323 324 SmallString<128> SubtargetKey(GPU); 325 SubtargetKey.append(FS); 326 327 auto &I = SubtargetMap[SubtargetKey]; 328 if (!I) { 329 // This needs to be done before we create a new subtarget since any 330 // creation will depend on the TM and the code generation flags on the 331 // function that reside in TargetOptions. 332 resetTargetOptions(F); 333 I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this); 334 335 #ifndef LLVM_BUILD_GLOBAL_ISEL 336 GISelAccessor *GISel = new GISelAccessor(); 337 #else 338 SIGISelActualAccessor *GISel = new SIGISelActualAccessor(); 339 GISel->CallLoweringInfo.reset( 340 new AMDGPUCallLowering(*I->getTargetLowering())); 341 GISel->Legalizer.reset(new AMDGPULegalizerInfo()); 342 343 GISel->RegBankInfo.reset(new AMDGPURegisterBankInfo(*I->getRegisterInfo())); 344 GISel->InstSelector.reset(new AMDGPUInstructionSelector(*I, 345 *static_cast<AMDGPURegisterBankInfo*>(GISel->RegBankInfo.get()))); 346 #endif 347 348 I->setGISelAccessor(*GISel); 349 } 350 351 I->setScalarizeGlobalBehavior(ScalarizeGlobal); 352 353 return I.get(); 354 } 355 356 //===----------------------------------------------------------------------===// 357 // AMDGPU Pass Setup 358 //===----------------------------------------------------------------------===// 359 360 namespace { 361 362 class AMDGPUPassConfig : public TargetPassConfig { 363 public: 364 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 365 : TargetPassConfig(TM, PM) { 366 // Exceptions and StackMaps are not supported, so these passes will never do 367 // anything. 368 disablePass(&StackMapLivenessID); 369 disablePass(&FuncletLayoutID); 370 } 371 372 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 373 return getTM<AMDGPUTargetMachine>(); 374 } 375 376 ScheduleDAGInstrs * 377 createMachineScheduler(MachineSchedContext *C) const override { 378 ScheduleDAGMILive *DAG = createGenericSchedLive(C); 379 DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI)); 380 DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI)); 381 return DAG; 382 } 383 384 void addEarlyCSEOrGVNPass(); 385 void addStraightLineScalarOptimizationPasses(); 386 void addIRPasses() override; 387 void addCodeGenPrepare() override; 388 bool addPreISel() override; 389 bool addInstSelector() override; 390 bool addGCPasses() override; 391 }; 392 393 class R600PassConfig final : public AMDGPUPassConfig { 394 public: 395 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 396 : AMDGPUPassConfig(TM, PM) {} 397 398 ScheduleDAGInstrs *createMachineScheduler( 399 MachineSchedContext *C) const override { 400 return createR600MachineScheduler(C); 401 } 402 403 bool addPreISel() override; 404 void addPreRegAlloc() override; 405 void addPreSched2() override; 406 void addPreEmitPass() override; 407 }; 408 409 class GCNPassConfig final : public AMDGPUPassConfig { 410 public: 411 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 412 : AMDGPUPassConfig(TM, PM) {} 413 414 GCNTargetMachine &getGCNTargetMachine() const { 415 return getTM<GCNTargetMachine>(); 416 } 417 418 ScheduleDAGInstrs * 419 createMachineScheduler(MachineSchedContext *C) const override; 420 421 bool addPreISel() override; 422 void addMachineSSAOptimization() override; 423 bool addILPOpts() override; 424 bool addInstSelector() override; 425 #ifdef LLVM_BUILD_GLOBAL_ISEL 426 bool addIRTranslator() override; 427 bool addLegalizeMachineIR() override; 428 bool addRegBankSelect() override; 429 bool addGlobalInstructionSelect() override; 430 #endif 431 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 432 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 433 void addPreRegAlloc() override; 434 void addPostRegAlloc() override; 435 void addPreSched2() override; 436 void addPreEmitPass() override; 437 }; 438 439 } // end anonymous namespace 440 441 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 442 return TargetIRAnalysis([this](const Function &F) { 443 return TargetTransformInfo(AMDGPUTTIImpl(this, F)); 444 }); 445 } 446 447 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() { 448 if (getOptLevel() == CodeGenOpt::Aggressive) 449 addPass(createGVNPass()); 450 else 451 addPass(createEarlyCSEPass()); 452 } 453 454 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() { 455 addPass(createSeparateConstOffsetFromGEPPass()); 456 addPass(createSpeculativeExecutionPass()); 457 // ReassociateGEPs exposes more opportunites for SLSR. See 458 // the example in reassociate-geps-and-slsr.ll. 459 addPass(createStraightLineStrengthReducePass()); 460 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or 461 // EarlyCSE can reuse. 462 addEarlyCSEOrGVNPass(); 463 // Run NaryReassociate after EarlyCSE/GVN to be more effective. 464 addPass(createNaryReassociatePass()); 465 // NaryReassociate on GEPs creates redundant common expressions, so run 466 // EarlyCSE after it. 467 addPass(createEarlyCSEPass()); 468 } 469 470 void AMDGPUPassConfig::addIRPasses() { 471 // There is no reason to run these. 472 disablePass(&StackMapLivenessID); 473 disablePass(&FuncletLayoutID); 474 disablePass(&PatchableFunctionID); 475 476 addPass(createAMDGPULowerIntrinsicsPass()); 477 478 // Function calls are not supported, so make sure we inline everything. 479 addPass(createAMDGPUAlwaysInlinePass()); 480 addPass(createAlwaysInlinerLegacyPass()); 481 // We need to add the barrier noop pass, otherwise adding the function 482 // inlining pass will cause all of the PassConfigs passes to be run 483 // one function at a time, which means if we have a nodule with two 484 // functions, then we will generate code for the first function 485 // without ever running any passes on the second. 486 addPass(createBarrierNoopPass()); 487 488 const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine(); 489 490 if (TM.getTargetTriple().getArch() == Triple::amdgcn) { 491 // TODO: May want to move later or split into an early and late one. 492 493 addPass(createAMDGPUCodeGenPreparePass( 494 static_cast<const GCNTargetMachine *>(&TM))); 495 } 496 497 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. 498 addPass(createAMDGPUOpenCLImageTypeLoweringPass()); 499 500 if (TM.getOptLevel() > CodeGenOpt::None) { 501 addPass(createInferAddressSpacesPass()); 502 addPass(createAMDGPUPromoteAlloca(&TM)); 503 504 if (EnableSROA) 505 addPass(createSROAPass()); 506 507 addStraightLineScalarOptimizationPasses(); 508 } 509 510 TargetPassConfig::addIRPasses(); 511 512 // EarlyCSE is not always strong enough to clean up what LSR produces. For 513 // example, GVN can combine 514 // 515 // %0 = add %a, %b 516 // %1 = add %b, %a 517 // 518 // and 519 // 520 // %0 = shl nsw %a, 2 521 // %1 = shl %a, 2 522 // 523 // but EarlyCSE can do neither of them. 524 if (getOptLevel() != CodeGenOpt::None) 525 addEarlyCSEOrGVNPass(); 526 } 527 528 void AMDGPUPassConfig::addCodeGenPrepare() { 529 TargetPassConfig::addCodeGenPrepare(); 530 531 if (EnableLoadStoreVectorizer) 532 addPass(createLoadStoreVectorizerPass()); 533 } 534 535 bool AMDGPUPassConfig::addPreISel() { 536 addPass(createFlattenCFGPass()); 537 return false; 538 } 539 540 bool AMDGPUPassConfig::addInstSelector() { 541 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel())); 542 return false; 543 } 544 545 bool AMDGPUPassConfig::addGCPasses() { 546 // Do nothing. GC is not supported. 547 return false; 548 } 549 550 //===----------------------------------------------------------------------===// 551 // R600 Pass Setup 552 //===----------------------------------------------------------------------===// 553 554 bool R600PassConfig::addPreISel() { 555 AMDGPUPassConfig::addPreISel(); 556 557 if (EnableR600StructurizeCFG) 558 addPass(createStructurizeCFGPass()); 559 return false; 560 } 561 562 void R600PassConfig::addPreRegAlloc() { 563 addPass(createR600VectorRegMerger(*TM)); 564 } 565 566 void R600PassConfig::addPreSched2() { 567 addPass(createR600EmitClauseMarkers(), false); 568 if (EnableR600IfConvert) 569 addPass(&IfConverterID, false); 570 addPass(createR600ClauseMergePass(*TM), false); 571 } 572 573 void R600PassConfig::addPreEmitPass() { 574 addPass(createAMDGPUCFGStructurizerPass(), false); 575 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 576 addPass(&FinalizeMachineBundlesID, false); 577 addPass(createR600Packetizer(*TM), false); 578 addPass(createR600ControlFlowFinalizer(*TM), false); 579 } 580 581 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 582 return new R600PassConfig(this, PM); 583 } 584 585 //===----------------------------------------------------------------------===// 586 // GCN Pass Setup 587 //===----------------------------------------------------------------------===// 588 589 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler( 590 MachineSchedContext *C) const { 591 const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>(); 592 if (ST.enableSIScheduler()) 593 return createSIMachineScheduler(C); 594 return createGCNMaxOccupancyMachineScheduler(C); 595 } 596 597 bool GCNPassConfig::addPreISel() { 598 AMDGPUPassConfig::addPreISel(); 599 600 // FIXME: We need to run a pass to propagate the attributes when calls are 601 // supported. 602 addPass(&AMDGPUAnnotateKernelFeaturesID); 603 addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions 604 addPass(createSinkingPass()); 605 addPass(createSITypeRewriter()); 606 addPass(createAMDGPUAnnotateUniformValues()); 607 addPass(createSIAnnotateControlFlowPass()); 608 609 return false; 610 } 611 612 void GCNPassConfig::addMachineSSAOptimization() { 613 TargetPassConfig::addMachineSSAOptimization(); 614 615 // We want to fold operands after PeepholeOptimizer has run (or as part of 616 // it), because it will eliminate extra copies making it easier to fold the 617 // real source operand. We want to eliminate dead instructions after, so that 618 // we see fewer uses of the copies. We then need to clean up the dead 619 // instructions leftover after the operands are folded as well. 620 // 621 // XXX - Can we get away without running DeadMachineInstructionElim again? 622 addPass(&SIFoldOperandsID); 623 addPass(&DeadMachineInstructionElimID); 624 addPass(&SILoadStoreOptimizerID); 625 } 626 627 bool GCNPassConfig::addILPOpts() { 628 if (EnableEarlyIfConversion) 629 addPass(&EarlyIfConverterID); 630 631 TargetPassConfig::addILPOpts(); 632 return false; 633 } 634 635 bool GCNPassConfig::addInstSelector() { 636 AMDGPUPassConfig::addInstSelector(); 637 addPass(createSILowerI1CopiesPass()); 638 addPass(&SIFixSGPRCopiesID); 639 return false; 640 } 641 642 #ifdef LLVM_BUILD_GLOBAL_ISEL 643 bool GCNPassConfig::addIRTranslator() { 644 addPass(new IRTranslator()); 645 return false; 646 } 647 648 bool GCNPassConfig::addLegalizeMachineIR() { 649 addPass(new Legalizer()); 650 return false; 651 } 652 653 bool GCNPassConfig::addRegBankSelect() { 654 addPass(new RegBankSelect()); 655 return false; 656 } 657 658 bool GCNPassConfig::addGlobalInstructionSelect() { 659 addPass(new InstructionSelect()); 660 return false; 661 } 662 663 #endif 664 665 void GCNPassConfig::addPreRegAlloc() { 666 addPass(createSIShrinkInstructionsPass()); 667 addPass(createSIWholeQuadModePass()); 668 } 669 670 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 671 // FIXME: We have to disable the verifier here because of PHIElimination + 672 // TwoAddressInstructions disabling it. 673 674 // This must be run immediately after phi elimination and before 675 // TwoAddressInstructions, otherwise the processing of the tied operand of 676 // SI_ELSE will introduce a copy of the tied operand source after the else. 677 insertPass(&PHIEliminationID, &SILowerControlFlowID, false); 678 679 TargetPassConfig::addFastRegAlloc(RegAllocPass); 680 } 681 682 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 683 // This needs to be run directly before register allocation because earlier 684 // passes might recompute live intervals. 685 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); 686 687 // This must be run immediately after phi elimination and before 688 // TwoAddressInstructions, otherwise the processing of the tied operand of 689 // SI_ELSE will introduce a copy of the tied operand source after the else. 690 insertPass(&PHIEliminationID, &SILowerControlFlowID, false); 691 692 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); 693 } 694 695 void GCNPassConfig::addPostRegAlloc() { 696 addPass(&SIFixVGPRCopiesID); 697 addPass(&SIOptimizeExecMaskingID); 698 TargetPassConfig::addPostRegAlloc(); 699 } 700 701 void GCNPassConfig::addPreSched2() { 702 } 703 704 void GCNPassConfig::addPreEmitPass() { 705 // The hazard recognizer that runs as part of the post-ra scheduler does not 706 // guarantee to be able handle all hazards correctly. This is because if there 707 // are multiple scheduling regions in a basic block, the regions are scheduled 708 // bottom up, so when we begin to schedule a region we don't know what 709 // instructions were emitted directly before it. 710 // 711 // Here we add a stand-alone hazard recognizer pass which can handle all 712 // cases. 713 addPass(&PostRAHazardRecognizerID); 714 715 addPass(createSIInsertWaitsPass()); 716 addPass(createSIShrinkInstructionsPass()); 717 addPass(&SIInsertSkipsPassID); 718 addPass(createSIDebuggerInsertNopsPass()); 719 addPass(&BranchRelaxationPassID); 720 } 721 722 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 723 return new GCNPassConfig(this, PM); 724 } 725