1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief The AMDGPU target machine contains all of the hardware specific
12 /// information  needed to emit code for R600 and SI GPUs.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AMDGPUTargetMachine.h"
17 #include "AMDGPU.h"
18 #include "AMDGPUCallLowering.h"
19 #include "AMDGPUTargetObjectFile.h"
20 #include "AMDGPUTargetTransformInfo.h"
21 #include "GCNSchedStrategy.h"
22 #include "R600ISelLowering.h"
23 #include "R600InstrInfo.h"
24 #include "R600MachineScheduler.h"
25 #include "SIISelLowering.h"
26 #include "SIInstrInfo.h"
27 #include "SIMachineScheduler.h"
28 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
29 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/CodeGen/TargetPassConfig.h"
31 #include "llvm/Support/TargetRegistry.h"
32 #include "llvm/Transforms/IPO.h"
33 #include "llvm/Transforms/IPO/AlwaysInliner.h"
34 #include "llvm/Transforms/Scalar.h"
35 #include "llvm/Transforms/Scalar/GVN.h"
36 #include "llvm/Transforms/Vectorize.h"
37 #include "llvm/IR/LegacyPassManager.h"
38 
39 using namespace llvm;
40 
41 static cl::opt<bool> EnableR600StructurizeCFG(
42   "r600-ir-structurize",
43   cl::desc("Use StructurizeCFG IR pass"),
44   cl::init(true));
45 
46 static cl::opt<bool> EnableSROA(
47   "amdgpu-sroa",
48   cl::desc("Run SROA after promote alloca pass"),
49   cl::ReallyHidden,
50   cl::init(true));
51 
52 static cl::opt<bool> EnableR600IfConvert(
53   "r600-if-convert",
54   cl::desc("Use if conversion pass"),
55   cl::ReallyHidden,
56   cl::init(true));
57 
58 // Option to disable vectorizer for tests.
59 static cl::opt<bool> EnableLoadStoreVectorizer(
60   "amdgpu-load-store-vectorizer",
61   cl::desc("Enable load store vectorizer"),
62   cl::init(true),
63   cl::Hidden);
64 
65 // Option to to control global loads scalarization
66 static cl::opt<bool> ScalarizeGlobal(
67   "amdgpu-scalarize-global-loads",
68   cl::desc("Enable global load scalarization"),
69   cl::init(false),
70   cl::Hidden);
71 
72 
73 extern "C" void LLVMInitializeAMDGPUTarget() {
74   // Register the target
75   RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
76   RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
77 
78   PassRegistry *PR = PassRegistry::getPassRegistry();
79   initializeSILowerI1CopiesPass(*PR);
80   initializeSIFixSGPRCopiesPass(*PR);
81   initializeSIFoldOperandsPass(*PR);
82   initializeSIShrinkInstructionsPass(*PR);
83   initializeSIFixControlFlowLiveIntervalsPass(*PR);
84   initializeSILoadStoreOptimizerPass(*PR);
85   initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
86   initializeAMDGPUAnnotateUniformValuesPass(*PR);
87   initializeAMDGPUPromoteAllocaPass(*PR);
88   initializeAMDGPUCodeGenPreparePass(*PR);
89   initializeAMDGPUUnifyMetadataPass(*PR);
90   initializeSIAnnotateControlFlowPass(*PR);
91   initializeSIInsertWaitsPass(*PR);
92   initializeSIWholeQuadModePass(*PR);
93   initializeSILowerControlFlowPass(*PR);
94   initializeSIInsertSkipsPass(*PR);
95   initializeSIDebuggerInsertNopsPass(*PR);
96   initializeSIOptimizeExecMaskingPass(*PR);
97 }
98 
99 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
100   return make_unique<AMDGPUTargetObjectFile>();
101 }
102 
103 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
104   return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>());
105 }
106 
107 static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
108   return new SIScheduleDAGMI(C);
109 }
110 
111 static ScheduleDAGInstrs *
112 createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
113   ScheduleDAGMILive *DAG =
114       new ScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
115   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
116   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
117   return DAG;
118 }
119 
120 static MachineSchedRegistry
121 R600SchedRegistry("r600", "Run R600's custom scheduler",
122                    createR600MachineScheduler);
123 
124 static MachineSchedRegistry
125 SISchedRegistry("si", "Run SI's custom scheduler",
126                 createSIMachineScheduler);
127 
128 static MachineSchedRegistry
129 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
130                              "Run GCN scheduler to maximize occupancy",
131                              createGCNMaxOccupancyMachineScheduler);
132 
133 static StringRef computeDataLayout(const Triple &TT) {
134   if (TT.getArch() == Triple::r600) {
135     // 32-bit pointers.
136     return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
137             "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
138   }
139 
140   // 32-bit private, local, and region pointers. 64-bit global, constant and
141   // flat.
142   return "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32"
143          "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
144          "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64";
145 }
146 
147 LLVM_READNONE
148 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
149   if (!GPU.empty())
150     return GPU;
151 
152   // HSA only supports CI+, so change the default GPU to a CI for HSA.
153   if (TT.getArch() == Triple::amdgcn)
154     return (TT.getOS() == Triple::AMDHSA) ? "kaveri" : "tahiti";
155 
156   return "r600";
157 }
158 
159 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
160   // The AMDGPU toolchain only supports generating shared objects, so we
161   // must always use PIC.
162   return Reloc::PIC_;
163 }
164 
165 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
166                                          StringRef CPU, StringRef FS,
167                                          TargetOptions Options,
168                                          Optional<Reloc::Model> RM,
169                                          CodeModel::Model CM,
170                                          CodeGenOpt::Level OptLevel)
171   : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
172                       FS, Options, getEffectiveRelocModel(RM), CM, OptLevel),
173     TLOF(createTLOF(getTargetTriple())),
174     IntrinsicInfo() {
175   initAsmInfo();
176 }
177 
178 AMDGPUTargetMachine::~AMDGPUTargetMachine() { }
179 
180 StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
181   Attribute GPUAttr = F.getFnAttribute("target-cpu");
182   return GPUAttr.hasAttribute(Attribute::None) ?
183     getTargetCPU() : GPUAttr.getValueAsString();
184 }
185 
186 StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
187   Attribute FSAttr = F.getFnAttribute("target-features");
188 
189   return FSAttr.hasAttribute(Attribute::None) ?
190     getTargetFeatureString() :
191     FSAttr.getValueAsString();
192 }
193 
194 void AMDGPUTargetMachine::addEarlyAsPossiblePasses(PassManagerBase &PM) {
195   PM.add(llvm::createAMDGPUUnifyMetadataPass());
196 }
197 
198 //===----------------------------------------------------------------------===//
199 // R600 Target Machine (R600 -> Cayman)
200 //===----------------------------------------------------------------------===//
201 
202 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
203                                      StringRef CPU, StringRef FS,
204                                      TargetOptions Options,
205                                      Optional<Reloc::Model> RM,
206                                      CodeModel::Model CM, CodeGenOpt::Level OL)
207   : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
208   setRequiresStructuredCFG(true);
209 }
210 
211 const R600Subtarget *R600TargetMachine::getSubtargetImpl(
212   const Function &F) const {
213   StringRef GPU = getGPUName(F);
214   StringRef FS = getFeatureString(F);
215 
216   SmallString<128> SubtargetKey(GPU);
217   SubtargetKey.append(FS);
218 
219   auto &I = SubtargetMap[SubtargetKey];
220   if (!I) {
221     // This needs to be done before we create a new subtarget since any
222     // creation will depend on the TM and the code generation flags on the
223     // function that reside in TargetOptions.
224     resetTargetOptions(F);
225     I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
226   }
227 
228   return I.get();
229 }
230 
231 //===----------------------------------------------------------------------===//
232 // GCN Target Machine (SI+)
233 //===----------------------------------------------------------------------===//
234 
235 #ifdef LLVM_BUILD_GLOBAL_ISEL
236 namespace {
237 struct SIGISelActualAccessor : public GISelAccessor {
238   std::unique_ptr<AMDGPUCallLowering> CallLoweringInfo;
239   const AMDGPUCallLowering *getCallLowering() const override {
240     return CallLoweringInfo.get();
241   }
242 };
243 } // End anonymous namespace.
244 #endif
245 
246 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
247                                    StringRef CPU, StringRef FS,
248                                    TargetOptions Options,
249                                    Optional<Reloc::Model> RM,
250                                    CodeModel::Model CM, CodeGenOpt::Level OL)
251   : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
252 
253 const SISubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
254   StringRef GPU = getGPUName(F);
255   StringRef FS = getFeatureString(F);
256 
257   SmallString<128> SubtargetKey(GPU);
258   SubtargetKey.append(FS);
259 
260   auto &I = SubtargetMap[SubtargetKey];
261   if (!I) {
262     // This needs to be done before we create a new subtarget since any
263     // creation will depend on the TM and the code generation flags on the
264     // function that reside in TargetOptions.
265     resetTargetOptions(F);
266     I = llvm::make_unique<SISubtarget>(TargetTriple, GPU, FS, *this);
267 
268 #ifndef LLVM_BUILD_GLOBAL_ISEL
269     GISelAccessor *GISel = new GISelAccessor();
270 #else
271     SIGISelActualAccessor *GISel = new SIGISelActualAccessor();
272     GISel->CallLoweringInfo.reset(
273       new AMDGPUCallLowering(*I->getTargetLowering()));
274 #endif
275 
276     I->setGISelAccessor(*GISel);
277   }
278 
279   I->setScalarizeGlobalBehavior(ScalarizeGlobal);
280 
281   return I.get();
282 }
283 
284 //===----------------------------------------------------------------------===//
285 // AMDGPU Pass Setup
286 //===----------------------------------------------------------------------===//
287 
288 namespace {
289 
290 class AMDGPUPassConfig : public TargetPassConfig {
291 public:
292   AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM)
293     : TargetPassConfig(TM, PM) {
294 
295     // Exceptions and StackMaps are not supported, so these passes will never do
296     // anything.
297     disablePass(&StackMapLivenessID);
298     disablePass(&FuncletLayoutID);
299   }
300 
301   AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
302     return getTM<AMDGPUTargetMachine>();
303   }
304 
305   ScheduleDAGInstrs *
306   createMachineScheduler(MachineSchedContext *C) const override {
307     ScheduleDAGMILive *DAG = createGenericSchedLive(C);
308     DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
309     DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
310     return DAG;
311   }
312 
313   void addEarlyCSEOrGVNPass();
314   void addStraightLineScalarOptimizationPasses();
315   void addIRPasses() override;
316   void addCodeGenPrepare() override;
317   bool addPreISel() override;
318   bool addInstSelector() override;
319   bool addGCPasses() override;
320 };
321 
322 class R600PassConfig final : public AMDGPUPassConfig {
323 public:
324   R600PassConfig(TargetMachine *TM, PassManagerBase &PM)
325     : AMDGPUPassConfig(TM, PM) { }
326 
327   ScheduleDAGInstrs *createMachineScheduler(
328     MachineSchedContext *C) const override {
329     return createR600MachineScheduler(C);
330   }
331 
332   bool addPreISel() override;
333   void addPreRegAlloc() override;
334   void addPreSched2() override;
335   void addPreEmitPass() override;
336 };
337 
338 class GCNPassConfig final : public AMDGPUPassConfig {
339 public:
340   GCNPassConfig(TargetMachine *TM, PassManagerBase &PM)
341     : AMDGPUPassConfig(TM, PM) { }
342 
343   GCNTargetMachine &getGCNTargetMachine() const {
344     return getTM<GCNTargetMachine>();
345   }
346 
347   ScheduleDAGInstrs *
348   createMachineScheduler(MachineSchedContext *C) const override;
349 
350   void addIRPasses() override;
351   bool addPreISel() override;
352   void addMachineSSAOptimization() override;
353   bool addInstSelector() override;
354 #ifdef LLVM_BUILD_GLOBAL_ISEL
355   bool addIRTranslator() override;
356   bool addLegalizeMachineIR() override;
357   bool addRegBankSelect() override;
358   bool addGlobalInstructionSelect() override;
359 #endif
360   void addFastRegAlloc(FunctionPass *RegAllocPass) override;
361   void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
362   void addPreRegAlloc() override;
363   void addPostRegAlloc() override;
364   void addPreSched2() override;
365   void addPreEmitPass() override;
366 };
367 
368 } // End of anonymous namespace
369 
370 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() {
371   return TargetIRAnalysis([this](const Function &F) {
372     return TargetTransformInfo(AMDGPUTTIImpl(this, F));
373   });
374 }
375 
376 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
377   if (getOptLevel() == CodeGenOpt::Aggressive)
378     addPass(createGVNPass());
379   else
380     addPass(createEarlyCSEPass());
381 }
382 
383 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
384   addPass(createSeparateConstOffsetFromGEPPass());
385   addPass(createSpeculativeExecutionPass());
386   // ReassociateGEPs exposes more opportunites for SLSR. See
387   // the example in reassociate-geps-and-slsr.ll.
388   addPass(createStraightLineStrengthReducePass());
389   // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
390   // EarlyCSE can reuse.
391   addEarlyCSEOrGVNPass();
392   // Run NaryReassociate after EarlyCSE/GVN to be more effective.
393   addPass(createNaryReassociatePass());
394   // NaryReassociate on GEPs creates redundant common expressions, so run
395   // EarlyCSE after it.
396   addPass(createEarlyCSEPass());
397 }
398 
399 void AMDGPUPassConfig::addIRPasses() {
400   // There is no reason to run these.
401   disablePass(&StackMapLivenessID);
402   disablePass(&FuncletLayoutID);
403   disablePass(&PatchableFunctionID);
404 
405   // Function calls are not supported, so make sure we inline everything.
406   addPass(createAMDGPUAlwaysInlinePass());
407   addPass(createAlwaysInlinerLegacyPass());
408   // We need to add the barrier noop pass, otherwise adding the function
409   // inlining pass will cause all of the PassConfigs passes to be run
410   // one function at a time, which means if we have a nodule with two
411   // functions, then we will generate code for the first function
412   // without ever running any passes on the second.
413   addPass(createBarrierNoopPass());
414 
415   // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
416   addPass(createAMDGPUOpenCLImageTypeLoweringPass());
417 
418   const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
419   if (TM.getOptLevel() > CodeGenOpt::None) {
420     addPass(createAMDGPUPromoteAlloca(&TM));
421 
422     if (EnableSROA)
423       addPass(createSROAPass());
424 
425     addStraightLineScalarOptimizationPasses();
426   }
427 
428   TargetPassConfig::addIRPasses();
429 
430   // EarlyCSE is not always strong enough to clean up what LSR produces. For
431   // example, GVN can combine
432   //
433   //   %0 = add %a, %b
434   //   %1 = add %b, %a
435   //
436   // and
437   //
438   //   %0 = shl nsw %a, 2
439   //   %1 = shl %a, 2
440   //
441   // but EarlyCSE can do neither of them.
442   if (getOptLevel() != CodeGenOpt::None)
443     addEarlyCSEOrGVNPass();
444 }
445 
446 void AMDGPUPassConfig::addCodeGenPrepare() {
447   TargetPassConfig::addCodeGenPrepare();
448 
449   if (EnableLoadStoreVectorizer)
450     addPass(createLoadStoreVectorizerPass());
451 }
452 
453 bool AMDGPUPassConfig::addPreISel() {
454   addPass(createFlattenCFGPass());
455   return false;
456 }
457 
458 bool AMDGPUPassConfig::addInstSelector() {
459   addPass(createAMDGPUISelDag(getAMDGPUTargetMachine(), getOptLevel()));
460   return false;
461 }
462 
463 bool AMDGPUPassConfig::addGCPasses() {
464   // Do nothing. GC is not supported.
465   return false;
466 }
467 
468 //===----------------------------------------------------------------------===//
469 // R600 Pass Setup
470 //===----------------------------------------------------------------------===//
471 
472 bool R600PassConfig::addPreISel() {
473   AMDGPUPassConfig::addPreISel();
474 
475   if (EnableR600StructurizeCFG)
476     addPass(createStructurizeCFGPass());
477   return false;
478 }
479 
480 void R600PassConfig::addPreRegAlloc() {
481   addPass(createR600VectorRegMerger(*TM));
482 }
483 
484 void R600PassConfig::addPreSched2() {
485   addPass(createR600EmitClauseMarkers(), false);
486   if (EnableR600IfConvert)
487     addPass(&IfConverterID, false);
488   addPass(createR600ClauseMergePass(*TM), false);
489 }
490 
491 void R600PassConfig::addPreEmitPass() {
492   addPass(createAMDGPUCFGStructurizerPass(), false);
493   addPass(createR600ExpandSpecialInstrsPass(*TM), false);
494   addPass(&FinalizeMachineBundlesID, false);
495   addPass(createR600Packetizer(*TM), false);
496   addPass(createR600ControlFlowFinalizer(*TM), false);
497 }
498 
499 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
500   return new R600PassConfig(this, PM);
501 }
502 
503 //===----------------------------------------------------------------------===//
504 // GCN Pass Setup
505 //===----------------------------------------------------------------------===//
506 
507 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
508   MachineSchedContext *C) const {
509   const SISubtarget &ST = C->MF->getSubtarget<SISubtarget>();
510   if (ST.enableSIScheduler())
511     return createSIMachineScheduler(C);
512   return createGCNMaxOccupancyMachineScheduler(C);
513 }
514 
515 bool GCNPassConfig::addPreISel() {
516   AMDGPUPassConfig::addPreISel();
517 
518   // FIXME: We need to run a pass to propagate the attributes when calls are
519   // supported.
520   addPass(&AMDGPUAnnotateKernelFeaturesID);
521   addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
522   addPass(createSinkingPass());
523   addPass(createSITypeRewriter());
524   addPass(createAMDGPUAnnotateUniformValues());
525   addPass(createSIAnnotateControlFlowPass());
526 
527   return false;
528 }
529 
530 void GCNPassConfig::addMachineSSAOptimization() {
531   TargetPassConfig::addMachineSSAOptimization();
532 
533   // We want to fold operands after PeepholeOptimizer has run (or as part of
534   // it), because it will eliminate extra copies making it easier to fold the
535   // real source operand. We want to eliminate dead instructions after, so that
536   // we see fewer uses of the copies. We then need to clean up the dead
537   // instructions leftover after the operands are folded as well.
538   //
539   // XXX - Can we get away without running DeadMachineInstructionElim again?
540   addPass(&SIFoldOperandsID);
541   addPass(&DeadMachineInstructionElimID);
542   addPass(&SILoadStoreOptimizerID);
543 }
544 
545 void GCNPassConfig::addIRPasses() {
546   // TODO: May want to move later or split into an early and late one.
547   addPass(createAMDGPUCodeGenPreparePass(&getGCNTargetMachine()));
548 
549   AMDGPUPassConfig::addIRPasses();
550 }
551 
552 bool GCNPassConfig::addInstSelector() {
553   AMDGPUPassConfig::addInstSelector();
554   addPass(createSILowerI1CopiesPass());
555   addPass(&SIFixSGPRCopiesID);
556   return false;
557 }
558 
559 #ifdef LLVM_BUILD_GLOBAL_ISEL
560 bool GCNPassConfig::addIRTranslator() {
561   addPass(new IRTranslator());
562   return false;
563 }
564 
565 bool GCNPassConfig::addLegalizeMachineIR() {
566   return false;
567 }
568 
569 bool GCNPassConfig::addRegBankSelect() {
570   return false;
571 }
572 
573 bool GCNPassConfig::addGlobalInstructionSelect() {
574   return false;
575 }
576 #endif
577 
578 void GCNPassConfig::addPreRegAlloc() {
579   addPass(createSIShrinkInstructionsPass());
580   addPass(createSIWholeQuadModePass());
581 }
582 
583 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
584   // FIXME: We have to disable the verifier here because of PHIElimination +
585   // TwoAddressInstructions disabling it.
586 
587   // This must be run immediately after phi elimination and before
588   // TwoAddressInstructions, otherwise the processing of the tied operand of
589   // SI_ELSE will introduce a copy of the tied operand source after the else.
590   insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
591 
592   TargetPassConfig::addFastRegAlloc(RegAllocPass);
593 }
594 
595 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
596   // This needs to be run directly before register allocation because earlier
597   // passes might recompute live intervals.
598   insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID);
599 
600   // This must be run immediately after phi elimination and before
601   // TwoAddressInstructions, otherwise the processing of the tied operand of
602   // SI_ELSE will introduce a copy of the tied operand source after the else.
603   insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
604 
605   TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
606 }
607 
608 void GCNPassConfig::addPostRegAlloc() {
609   addPass(&SIOptimizeExecMaskingID);
610   TargetPassConfig::addPostRegAlloc();
611 }
612 
613 void GCNPassConfig::addPreSched2() {
614 }
615 
616 void GCNPassConfig::addPreEmitPass() {
617   // The hazard recognizer that runs as part of the post-ra scheduler does not
618   // guarantee to be able handle all hazards correctly. This is because if there
619   // are multiple scheduling regions in a basic block, the regions are scheduled
620   // bottom up, so when we begin to schedule a region we don't know what
621   // instructions were emitted directly before it.
622   //
623   // Here we add a stand-alone hazard recognizer pass which can handle all
624   // cases.
625   addPass(&PostRAHazardRecognizerID);
626 
627   addPass(createSIInsertWaitsPass());
628   addPass(createSIShrinkInstructionsPass());
629   addPass(&SIInsertSkipsPassID);
630   addPass(createSIDebuggerInsertNopsPass());
631   addPass(&BranchRelaxationPassID);
632 }
633 
634 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
635   return new GCNPassConfig(this, PM);
636 }
637