1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// The AMDGPU target machine contains all of the hardware specific
11 /// information  needed to emit code for R600 and SI GPUs.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPUTargetMachine.h"
16 #include "AMDGPU.h"
17 #include "AMDGPUAliasAnalysis.h"
18 #include "AMDGPUExportClustering.h"
19 #include "AMDGPUMacroFusion.h"
20 #include "AMDGPUTargetObjectFile.h"
21 #include "AMDGPUTargetTransformInfo.h"
22 #include "GCNIterativeScheduler.h"
23 #include "GCNSchedStrategy.h"
24 #include "R600MachineScheduler.h"
25 #include "SIMachineFunctionInfo.h"
26 #include "SIMachineScheduler.h"
27 #include "TargetInfo/AMDGPUTargetInfo.h"
28 #include "llvm/Analysis/CGSCCPassManager.h"
29 #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
30 #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
31 #include "llvm/CodeGen/GlobalISel/Legalizer.h"
32 #include "llvm/CodeGen/GlobalISel/Localizer.h"
33 #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
34 #include "llvm/CodeGen/MIRParser/MIParser.h"
35 #include "llvm/CodeGen/Passes.h"
36 #include "llvm/CodeGen/RegAllocRegistry.h"
37 #include "llvm/CodeGen/TargetPassConfig.h"
38 #include "llvm/IR/LegacyPassManager.h"
39 #include "llvm/IR/PassManager.h"
40 #include "llvm/InitializePasses.h"
41 #include "llvm/Passes/PassBuilder.h"
42 #include "llvm/Support/TargetRegistry.h"
43 #include "llvm/Transforms/IPO.h"
44 #include "llvm/Transforms/IPO/AlwaysInliner.h"
45 #include "llvm/Transforms/IPO/GlobalDCE.h"
46 #include "llvm/Transforms/IPO/Internalize.h"
47 #include "llvm/Transforms/IPO/PassManagerBuilder.h"
48 #include "llvm/Transforms/Scalar.h"
49 #include "llvm/Transforms/Scalar/GVN.h"
50 #include "llvm/Transforms/Scalar/InferAddressSpaces.h"
51 #include "llvm/Transforms/Utils.h"
52 #include "llvm/Transforms/Utils/SimplifyLibCalls.h"
53 #include "llvm/Transforms/Vectorize.h"
54 
55 using namespace llvm;
56 
57 namespace {
58 class SGPRRegisterRegAlloc : public RegisterRegAllocBase<SGPRRegisterRegAlloc> {
59 public:
60   SGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
61     : RegisterRegAllocBase(N, D, C) {}
62 };
63 
64 class VGPRRegisterRegAlloc : public RegisterRegAllocBase<VGPRRegisterRegAlloc> {
65 public:
66   VGPRRegisterRegAlloc(const char *N, const char *D, FunctionPassCtor C)
67     : RegisterRegAllocBase(N, D, C) {}
68 };
69 
70 static bool onlyAllocateSGPRs(const TargetRegisterInfo &TRI,
71                               const TargetRegisterClass &RC) {
72   return static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
73 }
74 
75 static bool onlyAllocateVGPRs(const TargetRegisterInfo &TRI,
76                               const TargetRegisterClass &RC) {
77   return !static_cast<const SIRegisterInfo &>(TRI).isSGPRClass(&RC);
78 }
79 
80 
81 /// -{sgpr|vgpr}-regalloc=... command line option.
82 static FunctionPass *useDefaultRegisterAllocator() { return nullptr; }
83 
84 /// A dummy default pass factory indicates whether the register allocator is
85 /// overridden on the command line.
86 static llvm::once_flag InitializeDefaultSGPRRegisterAllocatorFlag;
87 static llvm::once_flag InitializeDefaultVGPRRegisterAllocatorFlag;
88 
89 static SGPRRegisterRegAlloc
90 defaultSGPRRegAlloc("default",
91                     "pick SGPR register allocator based on -O option",
92                     useDefaultRegisterAllocator);
93 
94 static cl::opt<SGPRRegisterRegAlloc::FunctionPassCtor, false,
95                RegisterPassParser<SGPRRegisterRegAlloc>>
96 SGPRRegAlloc("sgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
97              cl::desc("Register allocator to use for SGPRs"));
98 
99 static cl::opt<VGPRRegisterRegAlloc::FunctionPassCtor, false,
100                RegisterPassParser<VGPRRegisterRegAlloc>>
101 VGPRRegAlloc("vgpr-regalloc", cl::Hidden, cl::init(&useDefaultRegisterAllocator),
102              cl::desc("Register allocator to use for VGPRs"));
103 
104 
105 static void initializeDefaultSGPRRegisterAllocatorOnce() {
106   RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
107 
108   if (!Ctor) {
109     Ctor = SGPRRegAlloc;
110     SGPRRegisterRegAlloc::setDefault(SGPRRegAlloc);
111   }
112 }
113 
114 static void initializeDefaultVGPRRegisterAllocatorOnce() {
115   RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
116 
117   if (!Ctor) {
118     Ctor = VGPRRegAlloc;
119     VGPRRegisterRegAlloc::setDefault(VGPRRegAlloc);
120   }
121 }
122 
123 static FunctionPass *createBasicSGPRRegisterAllocator() {
124   return createBasicRegisterAllocator(onlyAllocateSGPRs);
125 }
126 
127 static FunctionPass *createGreedySGPRRegisterAllocator() {
128   return createGreedyRegisterAllocator(onlyAllocateSGPRs);
129 }
130 
131 static FunctionPass *createFastSGPRRegisterAllocator() {
132   return createFastRegisterAllocator(onlyAllocateSGPRs, false);
133 }
134 
135 static FunctionPass *createBasicVGPRRegisterAllocator() {
136   return createBasicRegisterAllocator(onlyAllocateVGPRs);
137 }
138 
139 static FunctionPass *createGreedyVGPRRegisterAllocator() {
140   return createGreedyRegisterAllocator(onlyAllocateVGPRs);
141 }
142 
143 static FunctionPass *createFastVGPRRegisterAllocator() {
144   return createFastRegisterAllocator(onlyAllocateVGPRs, true);
145 }
146 
147 static SGPRRegisterRegAlloc basicRegAllocSGPR(
148   "basic", "basic register allocator", createBasicSGPRRegisterAllocator);
149 static SGPRRegisterRegAlloc greedyRegAllocSGPR(
150   "greedy", "greedy register allocator", createGreedySGPRRegisterAllocator);
151 
152 static SGPRRegisterRegAlloc fastRegAllocSGPR(
153   "fast", "fast register allocator", createFastSGPRRegisterAllocator);
154 
155 
156 static VGPRRegisterRegAlloc basicRegAllocVGPR(
157   "basic", "basic register allocator", createBasicVGPRRegisterAllocator);
158 static VGPRRegisterRegAlloc greedyRegAllocVGPR(
159   "greedy", "greedy register allocator", createGreedyVGPRRegisterAllocator);
160 
161 static VGPRRegisterRegAlloc fastRegAllocVGPR(
162   "fast", "fast register allocator", createFastVGPRRegisterAllocator);
163 }
164 
165 
166 static cl::opt<bool> EnableR600StructurizeCFG(
167   "r600-ir-structurize",
168   cl::desc("Use StructurizeCFG IR pass"),
169   cl::init(true));
170 
171 static cl::opt<bool> EnableSROA(
172   "amdgpu-sroa",
173   cl::desc("Run SROA after promote alloca pass"),
174   cl::ReallyHidden,
175   cl::init(true));
176 
177 static cl::opt<bool>
178 EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
179                         cl::desc("Run early if-conversion"),
180                         cl::init(false));
181 
182 static cl::opt<bool>
183 OptExecMaskPreRA("amdgpu-opt-exec-mask-pre-ra", cl::Hidden,
184             cl::desc("Run pre-RA exec mask optimizations"),
185             cl::init(true));
186 
187 static cl::opt<bool> EnableR600IfConvert(
188   "r600-if-convert",
189   cl::desc("Use if conversion pass"),
190   cl::ReallyHidden,
191   cl::init(true));
192 
193 // Option to disable vectorizer for tests.
194 static cl::opt<bool> EnableLoadStoreVectorizer(
195   "amdgpu-load-store-vectorizer",
196   cl::desc("Enable load store vectorizer"),
197   cl::init(true),
198   cl::Hidden);
199 
200 // Option to control global loads scalarization
201 static cl::opt<bool> ScalarizeGlobal(
202   "amdgpu-scalarize-global-loads",
203   cl::desc("Enable global load scalarization"),
204   cl::init(true),
205   cl::Hidden);
206 
207 // Option to run internalize pass.
208 static cl::opt<bool> InternalizeSymbols(
209   "amdgpu-internalize-symbols",
210   cl::desc("Enable elimination of non-kernel functions and unused globals"),
211   cl::init(false),
212   cl::Hidden);
213 
214 // Option to inline all early.
215 static cl::opt<bool> EarlyInlineAll(
216   "amdgpu-early-inline-all",
217   cl::desc("Inline all functions early"),
218   cl::init(false),
219   cl::Hidden);
220 
221 static cl::opt<bool> EnableSDWAPeephole(
222   "amdgpu-sdwa-peephole",
223   cl::desc("Enable SDWA peepholer"),
224   cl::init(true));
225 
226 static cl::opt<bool> EnableDPPCombine(
227   "amdgpu-dpp-combine",
228   cl::desc("Enable DPP combiner"),
229   cl::init(true));
230 
231 // Enable address space based alias analysis
232 static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
233   cl::desc("Enable AMDGPU Alias Analysis"),
234   cl::init(true));
235 
236 // Option to run late CFG structurizer
237 static cl::opt<bool, true> LateCFGStructurize(
238   "amdgpu-late-structurize",
239   cl::desc("Enable late CFG structurization"),
240   cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
241   cl::Hidden);
242 
243 static cl::opt<bool, true> EnableAMDGPUFunctionCallsOpt(
244   "amdgpu-function-calls",
245   cl::desc("Enable AMDGPU function call support"),
246   cl::location(AMDGPUTargetMachine::EnableFunctionCalls),
247   cl::init(true),
248   cl::Hidden);
249 
250 static cl::opt<bool, true> EnableAMDGPUFixedFunctionABIOpt(
251   "amdgpu-fixed-function-abi",
252   cl::desc("Enable all implicit function arguments"),
253   cl::location(AMDGPUTargetMachine::EnableFixedFunctionABI),
254   cl::init(false),
255   cl::Hidden);
256 
257 // Enable lib calls simplifications
258 static cl::opt<bool> EnableLibCallSimplify(
259   "amdgpu-simplify-libcall",
260   cl::desc("Enable amdgpu library simplifications"),
261   cl::init(true),
262   cl::Hidden);
263 
264 static cl::opt<bool> EnableLowerKernelArguments(
265   "amdgpu-ir-lower-kernel-arguments",
266   cl::desc("Lower kernel argument loads in IR pass"),
267   cl::init(true),
268   cl::Hidden);
269 
270 static cl::opt<bool> EnableRegReassign(
271   "amdgpu-reassign-regs",
272   cl::desc("Enable register reassign optimizations on gfx10+"),
273   cl::init(true),
274   cl::Hidden);
275 
276 static cl::opt<bool> OptVGPRLiveRange(
277     "amdgpu-opt-vgpr-liverange",
278     cl::desc("Enable VGPR liverange optimizations for if-else structure"),
279     cl::init(true), cl::Hidden);
280 
281 // Enable atomic optimization
282 static cl::opt<bool> EnableAtomicOptimizations(
283   "amdgpu-atomic-optimizations",
284   cl::desc("Enable atomic optimizations"),
285   cl::init(false),
286   cl::Hidden);
287 
288 // Enable Mode register optimization
289 static cl::opt<bool> EnableSIModeRegisterPass(
290   "amdgpu-mode-register",
291   cl::desc("Enable mode register pass"),
292   cl::init(true),
293   cl::Hidden);
294 
295 // Option is used in lit tests to prevent deadcoding of patterns inspected.
296 static cl::opt<bool>
297 EnableDCEInRA("amdgpu-dce-in-ra",
298     cl::init(true), cl::Hidden,
299     cl::desc("Enable machine DCE inside regalloc"));
300 
301 static cl::opt<bool> EnableScalarIRPasses(
302   "amdgpu-scalar-ir-passes",
303   cl::desc("Enable scalar IR passes"),
304   cl::init(true),
305   cl::Hidden);
306 
307 static cl::opt<bool> EnableStructurizerWorkarounds(
308     "amdgpu-enable-structurizer-workarounds",
309     cl::desc("Enable workarounds for the StructurizeCFG pass"), cl::init(true),
310     cl::Hidden);
311 
312 static cl::opt<bool> EnableLDSReplaceWithPointer(
313     "amdgpu-enable-lds-replace-with-pointer",
314     cl::desc("Enable LDS replace with pointer pass"), cl::init(false),
315     cl::Hidden);
316 
317 static cl::opt<bool, true> EnableLowerModuleLDS(
318     "amdgpu-enable-lower-module-lds", cl::desc("Enable lower module lds pass"),
319     cl::location(AMDGPUTargetMachine::EnableLowerModuleLDS), cl::init(true),
320     cl::Hidden);
321 
322 static cl::opt<bool> EnablePreRAOptimizations(
323     "amdgpu-enable-pre-ra-optimizations",
324     cl::desc("Enable Pre-RA optimizations pass"), cl::init(true),
325     cl::Hidden);
326 
327 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUTarget() {
328   // Register the target
329   RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
330   RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
331 
332   PassRegistry *PR = PassRegistry::getPassRegistry();
333   initializeR600ClauseMergePassPass(*PR);
334   initializeR600ControlFlowFinalizerPass(*PR);
335   initializeR600PacketizerPass(*PR);
336   initializeR600ExpandSpecialInstrsPassPass(*PR);
337   initializeR600VectorRegMergerPass(*PR);
338   initializeGlobalISel(*PR);
339   initializeAMDGPUDAGToDAGISelPass(*PR);
340   initializeGCNDPPCombinePass(*PR);
341   initializeSILowerI1CopiesPass(*PR);
342   initializeSILowerSGPRSpillsPass(*PR);
343   initializeSIFixSGPRCopiesPass(*PR);
344   initializeSIFixVGPRCopiesPass(*PR);
345   initializeSIFoldOperandsPass(*PR);
346   initializeSIPeepholeSDWAPass(*PR);
347   initializeSIShrinkInstructionsPass(*PR);
348   initializeSIOptimizeExecMaskingPreRAPass(*PR);
349   initializeSIOptimizeVGPRLiveRangePass(*PR);
350   initializeSILoadStoreOptimizerPass(*PR);
351   initializeAMDGPUFixFunctionBitcastsPass(*PR);
352   initializeAMDGPUAlwaysInlinePass(*PR);
353   initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
354   initializeAMDGPUAnnotateUniformValuesPass(*PR);
355   initializeAMDGPUArgumentUsageInfoPass(*PR);
356   initializeAMDGPUAtomicOptimizerPass(*PR);
357   initializeAMDGPULowerKernelArgumentsPass(*PR);
358   initializeAMDGPULowerKernelAttributesPass(*PR);
359   initializeAMDGPULowerIntrinsicsPass(*PR);
360   initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
361   initializeAMDGPUPostLegalizerCombinerPass(*PR);
362   initializeAMDGPUPreLegalizerCombinerPass(*PR);
363   initializeAMDGPURegBankCombinerPass(*PR);
364   initializeAMDGPUPromoteAllocaPass(*PR);
365   initializeAMDGPUPromoteAllocaToVectorPass(*PR);
366   initializeAMDGPUCodeGenPreparePass(*PR);
367   initializeAMDGPULateCodeGenPreparePass(*PR);
368   initializeAMDGPUPropagateAttributesEarlyPass(*PR);
369   initializeAMDGPUPropagateAttributesLatePass(*PR);
370   initializeAMDGPUReplaceLDSUseWithPointerPass(*PR);
371   initializeAMDGPULowerModuleLDSPass(*PR);
372   initializeAMDGPURewriteOutArgumentsPass(*PR);
373   initializeAMDGPUUnifyMetadataPass(*PR);
374   initializeSIAnnotateControlFlowPass(*PR);
375   initializeSIInsertHardClausesPass(*PR);
376   initializeSIInsertWaitcntsPass(*PR);
377   initializeSIModeRegisterPass(*PR);
378   initializeSIWholeQuadModePass(*PR);
379   initializeSILowerControlFlowPass(*PR);
380   initializeSIPreEmitPeepholePass(*PR);
381   initializeSILateBranchLoweringPass(*PR);
382   initializeSIMemoryLegalizerPass(*PR);
383   initializeSIOptimizeExecMaskingPass(*PR);
384   initializeSIPreAllocateWWMRegsPass(*PR);
385   initializeSIFormMemoryClausesPass(*PR);
386   initializeSIPostRABundlerPass(*PR);
387   initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
388   initializeAMDGPUAAWrapperPassPass(*PR);
389   initializeAMDGPUExternalAAWrapperPass(*PR);
390   initializeAMDGPUUseNativeCallsPass(*PR);
391   initializeAMDGPUSimplifyLibCallsPass(*PR);
392   initializeAMDGPUPrintfRuntimeBindingPass(*PR);
393   initializeAMDGPUResourceUsageAnalysisPass(*PR);
394   initializeGCNNSAReassignPass(*PR);
395   initializeGCNPreRAOptimizationsPass(*PR);
396 }
397 
398 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
399   return std::make_unique<AMDGPUTargetObjectFile>();
400 }
401 
402 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
403   return new ScheduleDAGMILive(C, std::make_unique<R600SchedStrategy>());
404 }
405 
406 static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
407   return new SIScheduleDAGMI(C);
408 }
409 
410 static ScheduleDAGInstrs *
411 createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
412   ScheduleDAGMILive *DAG =
413     new GCNScheduleDAGMILive(C, std::make_unique<GCNMaxOccupancySchedStrategy>(C));
414   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
415   DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
416   DAG->addMutation(createAMDGPUExportClusteringDAGMutation());
417   return DAG;
418 }
419 
420 static ScheduleDAGInstrs *
421 createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
422   auto DAG = new GCNIterativeScheduler(C,
423     GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
424   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
425   return DAG;
426 }
427 
428 static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
429   return new GCNIterativeScheduler(C,
430     GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
431 }
432 
433 static ScheduleDAGInstrs *
434 createIterativeILPMachineScheduler(MachineSchedContext *C) {
435   auto DAG = new GCNIterativeScheduler(C,
436     GCNIterativeScheduler::SCHEDULE_ILP);
437   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
438   DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
439   return DAG;
440 }
441 
442 static MachineSchedRegistry
443 R600SchedRegistry("r600", "Run R600's custom scheduler",
444                    createR600MachineScheduler);
445 
446 static MachineSchedRegistry
447 SISchedRegistry("si", "Run SI's custom scheduler",
448                 createSIMachineScheduler);
449 
450 static MachineSchedRegistry
451 GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
452                              "Run GCN scheduler to maximize occupancy",
453                              createGCNMaxOccupancyMachineScheduler);
454 
455 static MachineSchedRegistry
456 IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
457   "Run GCN scheduler to maximize occupancy (experimental)",
458   createIterativeGCNMaxOccupancyMachineScheduler);
459 
460 static MachineSchedRegistry
461 GCNMinRegSchedRegistry("gcn-minreg",
462   "Run GCN iterative scheduler for minimal register usage (experimental)",
463   createMinRegScheduler);
464 
465 static MachineSchedRegistry
466 GCNILPSchedRegistry("gcn-ilp",
467   "Run GCN iterative scheduler for ILP scheduling (experimental)",
468   createIterativeILPMachineScheduler);
469 
470 static StringRef computeDataLayout(const Triple &TT) {
471   if (TT.getArch() == Triple::r600) {
472     // 32-bit pointers.
473     return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
474            "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1";
475   }
476 
477   // 32-bit private, local, and region pointers. 64-bit global, constant and
478   // flat, non-integral buffer fat pointers.
479   return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
480          "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
481          "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5-G1"
482          "-ni:7";
483 }
484 
485 LLVM_READNONE
486 static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
487   if (!GPU.empty())
488     return GPU;
489 
490   // Need to default to a target with flat support for HSA.
491   if (TT.getArch() == Triple::amdgcn)
492     return TT.getOS() == Triple::AMDHSA ? "generic-hsa" : "generic";
493 
494   return "r600";
495 }
496 
497 static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
498   // The AMDGPU toolchain only supports generating shared objects, so we
499   // must always use PIC.
500   return Reloc::PIC_;
501 }
502 
503 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
504                                          StringRef CPU, StringRef FS,
505                                          TargetOptions Options,
506                                          Optional<Reloc::Model> RM,
507                                          Optional<CodeModel::Model> CM,
508                                          CodeGenOpt::Level OptLevel)
509     : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
510                         FS, Options, getEffectiveRelocModel(RM),
511                         getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
512       TLOF(createTLOF(getTargetTriple())) {
513   initAsmInfo();
514   if (TT.getArch() == Triple::amdgcn) {
515     if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize64"))
516       MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave64));
517     else if (getMCSubtargetInfo()->checkFeatures("+wavefrontsize32"))
518       MRI.reset(llvm::createGCNMCRegisterInfo(AMDGPUDwarfFlavour::Wave32));
519   }
520 }
521 
522 bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
523 bool AMDGPUTargetMachine::EnableFunctionCalls = false;
524 bool AMDGPUTargetMachine::EnableFixedFunctionABI = false;
525 bool AMDGPUTargetMachine::EnableLowerModuleLDS = true;
526 
527 AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
528 
529 StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
530   Attribute GPUAttr = F.getFnAttribute("target-cpu");
531   return GPUAttr.isValid() ? GPUAttr.getValueAsString() : getTargetCPU();
532 }
533 
534 StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
535   Attribute FSAttr = F.getFnAttribute("target-features");
536 
537   return FSAttr.isValid() ? FSAttr.getValueAsString()
538                           : getTargetFeatureString();
539 }
540 
541 /// Predicate for Internalize pass.
542 static bool mustPreserveGV(const GlobalValue &GV) {
543   if (const Function *F = dyn_cast<Function>(&GV))
544     return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
545 
546   GV.removeDeadConstantUsers();
547   return !GV.use_empty();
548 }
549 
550 void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
551   Builder.DivergentTarget = true;
552 
553   bool EnableOpt = getOptLevel() > CodeGenOpt::None;
554   bool Internalize = InternalizeSymbols;
555   bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableFunctionCalls;
556   bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
557   bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
558 
559   if (EnableFunctionCalls) {
560     delete Builder.Inliner;
561     Builder.Inliner = createFunctionInliningPass();
562   }
563 
564   Builder.addExtension(
565     PassManagerBuilder::EP_ModuleOptimizerEarly,
566     [Internalize, EarlyInline, AMDGPUAA, this](const PassManagerBuilder &,
567                                                legacy::PassManagerBase &PM) {
568       if (AMDGPUAA) {
569         PM.add(createAMDGPUAAWrapperPass());
570         PM.add(createAMDGPUExternalAAWrapperPass());
571       }
572       PM.add(createAMDGPUUnifyMetadataPass());
573       PM.add(createAMDGPUPrintfRuntimeBinding());
574       if (Internalize)
575         PM.add(createInternalizePass(mustPreserveGV));
576       PM.add(createAMDGPUPropagateAttributesLatePass(this));
577       if (Internalize)
578         PM.add(createGlobalDCEPass());
579       if (EarlyInline)
580         PM.add(createAMDGPUAlwaysInlinePass(false));
581   });
582 
583   Builder.addExtension(
584     PassManagerBuilder::EP_EarlyAsPossible,
585     [AMDGPUAA, LibCallSimplify, this](const PassManagerBuilder &,
586                                       legacy::PassManagerBase &PM) {
587       if (AMDGPUAA) {
588         PM.add(createAMDGPUAAWrapperPass());
589         PM.add(createAMDGPUExternalAAWrapperPass());
590       }
591       PM.add(llvm::createAMDGPUPropagateAttributesEarlyPass(this));
592       PM.add(llvm::createAMDGPUUseNativeCallsPass());
593       if (LibCallSimplify)
594         PM.add(llvm::createAMDGPUSimplifyLibCallsPass(this));
595   });
596 
597   Builder.addExtension(
598     PassManagerBuilder::EP_CGSCCOptimizerLate,
599     [EnableOpt](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
600       // Add infer address spaces pass to the opt pipeline after inlining
601       // but before SROA to increase SROA opportunities.
602       PM.add(createInferAddressSpacesPass());
603 
604       // This should run after inlining to have any chance of doing anything,
605       // and before other cleanup optimizations.
606       PM.add(createAMDGPULowerKernelAttributesPass());
607 
608       // Promote alloca to vector before SROA and loop unroll. If we manage
609       // to eliminate allocas before unroll we may choose to unroll less.
610       if (EnableOpt)
611         PM.add(createAMDGPUPromoteAllocaToVector());
612   });
613 }
614 
615 void AMDGPUTargetMachine::registerDefaultAliasAnalyses(AAManager &AAM) {
616   AAM.registerFunctionAnalysis<AMDGPUAA>();
617 }
618 
619 void AMDGPUTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
620   PB.registerPipelineParsingCallback(
621       [this](StringRef PassName, ModulePassManager &PM,
622              ArrayRef<PassBuilder::PipelineElement>) {
623         if (PassName == "amdgpu-propagate-attributes-late") {
624           PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
625           return true;
626         }
627         if (PassName == "amdgpu-unify-metadata") {
628           PM.addPass(AMDGPUUnifyMetadataPass());
629           return true;
630         }
631         if (PassName == "amdgpu-printf-runtime-binding") {
632           PM.addPass(AMDGPUPrintfRuntimeBindingPass());
633           return true;
634         }
635         if (PassName == "amdgpu-always-inline") {
636           PM.addPass(AMDGPUAlwaysInlinePass());
637           return true;
638         }
639         if (PassName == "amdgpu-replace-lds-use-with-pointer") {
640           PM.addPass(AMDGPUReplaceLDSUseWithPointerPass());
641           return true;
642         }
643         if (PassName == "amdgpu-lower-module-lds") {
644           PM.addPass(AMDGPULowerModuleLDSPass());
645           return true;
646         }
647         return false;
648       });
649   PB.registerPipelineParsingCallback(
650       [this](StringRef PassName, FunctionPassManager &PM,
651              ArrayRef<PassBuilder::PipelineElement>) {
652         if (PassName == "amdgpu-simplifylib") {
653           PM.addPass(AMDGPUSimplifyLibCallsPass(*this));
654           return true;
655         }
656         if (PassName == "amdgpu-usenative") {
657           PM.addPass(AMDGPUUseNativeCallsPass());
658           return true;
659         }
660         if (PassName == "amdgpu-promote-alloca") {
661           PM.addPass(AMDGPUPromoteAllocaPass(*this));
662           return true;
663         }
664         if (PassName == "amdgpu-promote-alloca-to-vector") {
665           PM.addPass(AMDGPUPromoteAllocaToVectorPass(*this));
666           return true;
667         }
668         if (PassName == "amdgpu-lower-kernel-attributes") {
669           PM.addPass(AMDGPULowerKernelAttributesPass());
670           return true;
671         }
672         if (PassName == "amdgpu-propagate-attributes-early") {
673           PM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
674           return true;
675         }
676         return false;
677       });
678 
679   PB.registerAnalysisRegistrationCallback([](FunctionAnalysisManager &FAM) {
680     FAM.registerPass([&] { return AMDGPUAA(); });
681   });
682 
683   PB.registerParseAACallback([](StringRef AAName, AAManager &AAM) {
684     if (AAName == "amdgpu-aa") {
685       AAM.registerFunctionAnalysis<AMDGPUAA>();
686       return true;
687     }
688     return false;
689   });
690 
691   PB.registerPipelineStartEPCallback(
692       [this](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) {
693         FunctionPassManager FPM;
694         FPM.addPass(AMDGPUPropagateAttributesEarlyPass(*this));
695         FPM.addPass(AMDGPUUseNativeCallsPass());
696         if (EnableLibCallSimplify &&
697             Level != PassBuilder::OptimizationLevel::O0)
698           FPM.addPass(AMDGPUSimplifyLibCallsPass(*this));
699         PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
700       });
701 
702   PB.registerPipelineEarlySimplificationEPCallback(
703       [this](ModulePassManager &PM, PassBuilder::OptimizationLevel Level) {
704         if (Level == PassBuilder::OptimizationLevel::O0)
705           return;
706 
707         PM.addPass(AMDGPUUnifyMetadataPass());
708         PM.addPass(AMDGPUPrintfRuntimeBindingPass());
709 
710         if (InternalizeSymbols) {
711           PM.addPass(InternalizePass(mustPreserveGV));
712         }
713         PM.addPass(AMDGPUPropagateAttributesLatePass(*this));
714         if (InternalizeSymbols) {
715           PM.addPass(GlobalDCEPass());
716         }
717         if (EarlyInlineAll && !EnableFunctionCalls)
718           PM.addPass(AMDGPUAlwaysInlinePass());
719       });
720 
721   PB.registerCGSCCOptimizerLateEPCallback(
722       [this](CGSCCPassManager &PM, PassBuilder::OptimizationLevel Level) {
723         if (Level == PassBuilder::OptimizationLevel::O0)
724           return;
725 
726         FunctionPassManager FPM;
727 
728         // Add infer address spaces pass to the opt pipeline after inlining
729         // but before SROA to increase SROA opportunities.
730         FPM.addPass(InferAddressSpacesPass());
731 
732         // This should run after inlining to have any chance of doing
733         // anything, and before other cleanup optimizations.
734         FPM.addPass(AMDGPULowerKernelAttributesPass());
735 
736         if (Level != PassBuilder::OptimizationLevel::O0) {
737           // Promote alloca to vector before SROA and loop unroll. If we
738           // manage to eliminate allocas before unroll we may choose to unroll
739           // less.
740           FPM.addPass(AMDGPUPromoteAllocaToVectorPass(*this));
741         }
742 
743         PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
744       });
745 }
746 
747 //===----------------------------------------------------------------------===//
748 // R600 Target Machine (R600 -> Cayman)
749 //===----------------------------------------------------------------------===//
750 
751 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
752                                      StringRef CPU, StringRef FS,
753                                      TargetOptions Options,
754                                      Optional<Reloc::Model> RM,
755                                      Optional<CodeModel::Model> CM,
756                                      CodeGenOpt::Level OL, bool JIT)
757     : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
758   setRequiresStructuredCFG(true);
759 
760   // Override the default since calls aren't supported for r600.
761   if (EnableFunctionCalls &&
762       EnableAMDGPUFunctionCallsOpt.getNumOccurrences() == 0)
763     EnableFunctionCalls = false;
764 }
765 
766 const R600Subtarget *R600TargetMachine::getSubtargetImpl(
767   const Function &F) const {
768   StringRef GPU = getGPUName(F);
769   StringRef FS = getFeatureString(F);
770 
771   SmallString<128> SubtargetKey(GPU);
772   SubtargetKey.append(FS);
773 
774   auto &I = SubtargetMap[SubtargetKey];
775   if (!I) {
776     // This needs to be done before we create a new subtarget since any
777     // creation will depend on the TM and the code generation flags on the
778     // function that reside in TargetOptions.
779     resetTargetOptions(F);
780     I = std::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
781   }
782 
783   return I.get();
784 }
785 
786 int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
787   return (AddrSpace == AMDGPUAS::LOCAL_ADDRESS ||
788           AddrSpace == AMDGPUAS::PRIVATE_ADDRESS ||
789           AddrSpace == AMDGPUAS::REGION_ADDRESS)
790              ? -1
791              : 0;
792 }
793 
794 bool AMDGPUTargetMachine::isNoopAddrSpaceCast(unsigned SrcAS,
795                                               unsigned DestAS) const {
796   return AMDGPU::isFlatGlobalAddrSpace(SrcAS) &&
797          AMDGPU::isFlatGlobalAddrSpace(DestAS);
798 }
799 
800 unsigned AMDGPUTargetMachine::getAssumedAddrSpace(const Value *V) const {
801   const auto *LD = dyn_cast<LoadInst>(V);
802   if (!LD)
803     return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
804 
805   // It must be a generic pointer loaded.
806   assert(V->getType()->isPointerTy() &&
807          V->getType()->getPointerAddressSpace() == AMDGPUAS::FLAT_ADDRESS);
808 
809   const auto *Ptr = LD->getPointerOperand();
810   if (Ptr->getType()->getPointerAddressSpace() != AMDGPUAS::CONSTANT_ADDRESS)
811     return AMDGPUAS::UNKNOWN_ADDRESS_SPACE;
812   // For a generic pointer loaded from the constant memory, it could be assumed
813   // as a global pointer since the constant memory is only populated on the
814   // host side. As implied by the offload programming model, only global
815   // pointers could be referenced on the host side.
816   return AMDGPUAS::GLOBAL_ADDRESS;
817 }
818 
819 TargetTransformInfo
820 R600TargetMachine::getTargetTransformInfo(const Function &F) {
821   return TargetTransformInfo(R600TTIImpl(this, F));
822 }
823 
824 //===----------------------------------------------------------------------===//
825 // GCN Target Machine (SI+)
826 //===----------------------------------------------------------------------===//
827 
828 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
829                                    StringRef CPU, StringRef FS,
830                                    TargetOptions Options,
831                                    Optional<Reloc::Model> RM,
832                                    Optional<CodeModel::Model> CM,
833                                    CodeGenOpt::Level OL, bool JIT)
834     : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
835 
836 const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
837   StringRef GPU = getGPUName(F);
838   StringRef FS = getFeatureString(F);
839 
840   SmallString<128> SubtargetKey(GPU);
841   SubtargetKey.append(FS);
842 
843   auto &I = SubtargetMap[SubtargetKey];
844   if (!I) {
845     // This needs to be done before we create a new subtarget since any
846     // creation will depend on the TM and the code generation flags on the
847     // function that reside in TargetOptions.
848     resetTargetOptions(F);
849     I = std::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
850   }
851 
852   I->setScalarizeGlobalBehavior(ScalarizeGlobal);
853 
854   return I.get();
855 }
856 
857 TargetTransformInfo
858 GCNTargetMachine::getTargetTransformInfo(const Function &F) {
859   return TargetTransformInfo(GCNTTIImpl(this, F));
860 }
861 
862 //===----------------------------------------------------------------------===//
863 // AMDGPU Pass Setup
864 //===----------------------------------------------------------------------===//
865 
866 namespace {
867 
868 class AMDGPUPassConfig : public TargetPassConfig {
869 public:
870   AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
871     : TargetPassConfig(TM, PM) {
872     // Exceptions and StackMaps are not supported, so these passes will never do
873     // anything.
874     disablePass(&StackMapLivenessID);
875     disablePass(&FuncletLayoutID);
876     // Garbage collection is not supported.
877     disablePass(&GCLoweringID);
878     disablePass(&ShadowStackGCLoweringID);
879   }
880 
881   AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
882     return getTM<AMDGPUTargetMachine>();
883   }
884 
885   ScheduleDAGInstrs *
886   createMachineScheduler(MachineSchedContext *C) const override {
887     ScheduleDAGMILive *DAG = createGenericSchedLive(C);
888     DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
889     return DAG;
890   }
891 
892   void addEarlyCSEOrGVNPass();
893   void addStraightLineScalarOptimizationPasses();
894   void addIRPasses() override;
895   void addCodeGenPrepare() override;
896   bool addPreISel() override;
897   bool addInstSelector() override;
898   bool addGCPasses() override;
899 
900   std::unique_ptr<CSEConfigBase> getCSEConfig() const override;
901 
902   /// Check if a pass is enabled given \p Opt option. The option always
903   /// overrides defaults if explicitely used. Otherwise its default will
904   /// be used given that a pass shall work at an optimization \p Level
905   /// minimum.
906   bool isPassEnabled(const cl::opt<bool> &Opt,
907                      CodeGenOpt::Level Level = CodeGenOpt::Default) const {
908     if (Opt.getNumOccurrences())
909       return Opt;
910     if (TM->getOptLevel() < Level)
911       return false;
912     return Opt;
913   }
914 };
915 
916 std::unique_ptr<CSEConfigBase> AMDGPUPassConfig::getCSEConfig() const {
917   return getStandardCSEConfigForOpt(TM->getOptLevel());
918 }
919 
920 class R600PassConfig final : public AMDGPUPassConfig {
921 public:
922   R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
923     : AMDGPUPassConfig(TM, PM) {}
924 
925   ScheduleDAGInstrs *createMachineScheduler(
926     MachineSchedContext *C) const override {
927     return createR600MachineScheduler(C);
928   }
929 
930   bool addPreISel() override;
931   bool addInstSelector() override;
932   void addPreRegAlloc() override;
933   void addPreSched2() override;
934   void addPreEmitPass() override;
935 };
936 
937 class GCNPassConfig final : public AMDGPUPassConfig {
938 public:
939   GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
940     : AMDGPUPassConfig(TM, PM) {
941     // It is necessary to know the register usage of the entire call graph.  We
942     // allow calls without EnableAMDGPUFunctionCalls if they are marked
943     // noinline, so this is always required.
944     setRequiresCodeGenSCCOrder(true);
945   }
946 
947   GCNTargetMachine &getGCNTargetMachine() const {
948     return getTM<GCNTargetMachine>();
949   }
950 
951   ScheduleDAGInstrs *
952   createMachineScheduler(MachineSchedContext *C) const override;
953 
954   bool addPreISel() override;
955   void addMachineSSAOptimization() override;
956   bool addILPOpts() override;
957   bool addInstSelector() override;
958   bool addIRTranslator() override;
959   void addPreLegalizeMachineIR() override;
960   bool addLegalizeMachineIR() override;
961   void addPreRegBankSelect() override;
962   bool addRegBankSelect() override;
963   void addPreGlobalInstructionSelect() override;
964   bool addGlobalInstructionSelect() override;
965   void addFastRegAlloc() override;
966   void addOptimizedRegAlloc() override;
967 
968   FunctionPass *createSGPRAllocPass(bool Optimized);
969   FunctionPass *createVGPRAllocPass(bool Optimized);
970   FunctionPass *createRegAllocPass(bool Optimized) override;
971 
972   bool addRegAssignAndRewriteFast() override;
973   bool addRegAssignAndRewriteOptimized() override;
974 
975   void addPreRegAlloc() override;
976   bool addPreRewrite() override;
977   void addPostRegAlloc() override;
978   void addPreSched2() override;
979   void addPreEmitPass() override;
980 };
981 
982 } // end anonymous namespace
983 
984 void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
985   if (getOptLevel() == CodeGenOpt::Aggressive)
986     addPass(createGVNPass());
987   else
988     addPass(createEarlyCSEPass());
989 }
990 
991 void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
992   addPass(createLICMPass());
993   addPass(createSeparateConstOffsetFromGEPPass());
994   addPass(createSpeculativeExecutionPass());
995   // ReassociateGEPs exposes more opportunites for SLSR. See
996   // the example in reassociate-geps-and-slsr.ll.
997   addPass(createStraightLineStrengthReducePass());
998   // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
999   // EarlyCSE can reuse.
1000   addEarlyCSEOrGVNPass();
1001   // Run NaryReassociate after EarlyCSE/GVN to be more effective.
1002   addPass(createNaryReassociatePass());
1003   // NaryReassociate on GEPs creates redundant common expressions, so run
1004   // EarlyCSE after it.
1005   addPass(createEarlyCSEPass());
1006 }
1007 
1008 void AMDGPUPassConfig::addIRPasses() {
1009   const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
1010 
1011   // There is no reason to run these.
1012   disablePass(&StackMapLivenessID);
1013   disablePass(&FuncletLayoutID);
1014   disablePass(&PatchableFunctionID);
1015 
1016   addPass(createAMDGPUPrintfRuntimeBinding());
1017 
1018   // This must occur before inlining, as the inliner will not look through
1019   // bitcast calls.
1020   addPass(createAMDGPUFixFunctionBitcastsPass());
1021 
1022   // A call to propagate attributes pass in the backend in case opt was not run.
1023   addPass(createAMDGPUPropagateAttributesEarlyPass(&TM));
1024 
1025   addPass(createAMDGPULowerIntrinsicsPass());
1026 
1027   // Function calls are not supported, so make sure we inline everything.
1028   addPass(createAMDGPUAlwaysInlinePass());
1029   addPass(createAlwaysInlinerLegacyPass());
1030   // We need to add the barrier noop pass, otherwise adding the function
1031   // inlining pass will cause all of the PassConfigs passes to be run
1032   // one function at a time, which means if we have a nodule with two
1033   // functions, then we will generate code for the first function
1034   // without ever running any passes on the second.
1035   addPass(createBarrierNoopPass());
1036 
1037   // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
1038   if (TM.getTargetTriple().getArch() == Triple::r600)
1039     addPass(createR600OpenCLImageTypeLoweringPass());
1040 
1041   // Replace OpenCL enqueued block function pointers with global variables.
1042   addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass());
1043 
1044   // Can increase LDS used by kernel so runs before PromoteAlloca
1045   if (EnableLowerModuleLDS) {
1046     // The pass "amdgpu-replace-lds-use-with-pointer" need to be run before the
1047     // pass "amdgpu-lower-module-lds", and also it required to be run only if
1048     // "amdgpu-lower-module-lds" pass is enabled.
1049     if (EnableLDSReplaceWithPointer)
1050       addPass(createAMDGPUReplaceLDSUseWithPointerPass());
1051 
1052     addPass(createAMDGPULowerModuleLDSPass());
1053   }
1054 
1055   if (TM.getOptLevel() > CodeGenOpt::None)
1056     addPass(createInferAddressSpacesPass());
1057 
1058   addPass(createAtomicExpandPass());
1059 
1060   if (TM.getOptLevel() > CodeGenOpt::None) {
1061     addPass(createAMDGPUPromoteAlloca());
1062 
1063     if (EnableSROA)
1064       addPass(createSROAPass());
1065     if (isPassEnabled(EnableScalarIRPasses))
1066       addStraightLineScalarOptimizationPasses();
1067 
1068     if (EnableAMDGPUAliasAnalysis) {
1069       addPass(createAMDGPUAAWrapperPass());
1070       addPass(createExternalAAWrapperPass([](Pass &P, Function &,
1071                                              AAResults &AAR) {
1072         if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
1073           AAR.addAAResult(WrapperPass->getResult());
1074         }));
1075     }
1076 
1077     if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
1078       // TODO: May want to move later or split into an early and late one.
1079       addPass(createAMDGPUCodeGenPreparePass());
1080     }
1081   }
1082 
1083   TargetPassConfig::addIRPasses();
1084 
1085   // EarlyCSE is not always strong enough to clean up what LSR produces. For
1086   // example, GVN can combine
1087   //
1088   //   %0 = add %a, %b
1089   //   %1 = add %b, %a
1090   //
1091   // and
1092   //
1093   //   %0 = shl nsw %a, 2
1094   //   %1 = shl %a, 2
1095   //
1096   // but EarlyCSE can do neither of them.
1097   if (isPassEnabled(EnableScalarIRPasses))
1098     addEarlyCSEOrGVNPass();
1099 }
1100 
1101 void AMDGPUPassConfig::addCodeGenPrepare() {
1102   if (TM->getTargetTriple().getArch() == Triple::amdgcn)
1103     addPass(createAMDGPUAnnotateKernelFeaturesPass());
1104 
1105   if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
1106       EnableLowerKernelArguments)
1107     addPass(createAMDGPULowerKernelArgumentsPass());
1108 
1109   TargetPassConfig::addCodeGenPrepare();
1110 
1111   if (isPassEnabled(EnableLoadStoreVectorizer))
1112     addPass(createLoadStoreVectorizerPass());
1113 
1114   // LowerSwitch pass may introduce unreachable blocks that can
1115   // cause unexpected behavior for subsequent passes. Placing it
1116   // here seems better that these blocks would get cleaned up by
1117   // UnreachableBlockElim inserted next in the pass flow.
1118   addPass(createLowerSwitchPass());
1119 }
1120 
1121 bool AMDGPUPassConfig::addPreISel() {
1122   if (TM->getOptLevel() > CodeGenOpt::None)
1123     addPass(createFlattenCFGPass());
1124   return false;
1125 }
1126 
1127 bool AMDGPUPassConfig::addInstSelector() {
1128   // Defer the verifier until FinalizeISel.
1129   addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()), false);
1130   return false;
1131 }
1132 
1133 bool AMDGPUPassConfig::addGCPasses() {
1134   // Do nothing. GC is not supported.
1135   return false;
1136 }
1137 
1138 //===----------------------------------------------------------------------===//
1139 // R600 Pass Setup
1140 //===----------------------------------------------------------------------===//
1141 
1142 bool R600PassConfig::addPreISel() {
1143   AMDGPUPassConfig::addPreISel();
1144 
1145   if (EnableR600StructurizeCFG)
1146     addPass(createStructurizeCFGPass());
1147   return false;
1148 }
1149 
1150 bool R600PassConfig::addInstSelector() {
1151   addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
1152   return false;
1153 }
1154 
1155 void R600PassConfig::addPreRegAlloc() {
1156   addPass(createR600VectorRegMerger());
1157 }
1158 
1159 void R600PassConfig::addPreSched2() {
1160   addPass(createR600EmitClauseMarkers(), false);
1161   if (EnableR600IfConvert)
1162     addPass(&IfConverterID, false);
1163   addPass(createR600ClauseMergePass(), false);
1164 }
1165 
1166 void R600PassConfig::addPreEmitPass() {
1167   addPass(createAMDGPUCFGStructurizerPass(), false);
1168   addPass(createR600ExpandSpecialInstrsPass(), false);
1169   addPass(&FinalizeMachineBundlesID, false);
1170   addPass(createR600Packetizer(), false);
1171   addPass(createR600ControlFlowFinalizer(), false);
1172 }
1173 
1174 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
1175   return new R600PassConfig(*this, PM);
1176 }
1177 
1178 //===----------------------------------------------------------------------===//
1179 // GCN Pass Setup
1180 //===----------------------------------------------------------------------===//
1181 
1182 ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
1183   MachineSchedContext *C) const {
1184   const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
1185   if (ST.enableSIScheduler())
1186     return createSIMachineScheduler(C);
1187   return createGCNMaxOccupancyMachineScheduler(C);
1188 }
1189 
1190 bool GCNPassConfig::addPreISel() {
1191   AMDGPUPassConfig::addPreISel();
1192 
1193   if (TM->getOptLevel() > CodeGenOpt::None)
1194     addPass(createAMDGPULateCodeGenPreparePass());
1195 
1196   if (isPassEnabled(EnableAtomicOptimizations, CodeGenOpt::Less)) {
1197     addPass(createAMDGPUAtomicOptimizerPass());
1198   }
1199 
1200   if (TM->getOptLevel() > CodeGenOpt::None)
1201     addPass(createSinkingPass());
1202 
1203   // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
1204   // regions formed by them.
1205   addPass(&AMDGPUUnifyDivergentExitNodesID);
1206   if (!LateCFGStructurize) {
1207     if (EnableStructurizerWorkarounds) {
1208       addPass(createFixIrreduciblePass());
1209       addPass(createUnifyLoopExitsPass());
1210     }
1211     addPass(createStructurizeCFGPass(false)); // true -> SkipUniformRegions
1212   }
1213   addPass(createAMDGPUAnnotateUniformValues());
1214   if (!LateCFGStructurize) {
1215     addPass(createSIAnnotateControlFlowPass());
1216   }
1217   addPass(createLCSSAPass());
1218 
1219   if (TM->getOptLevel() > CodeGenOpt::Less)
1220     addPass(&AMDGPUPerfHintAnalysisID);
1221 
1222   return false;
1223 }
1224 
1225 void GCNPassConfig::addMachineSSAOptimization() {
1226   TargetPassConfig::addMachineSSAOptimization();
1227 
1228   // We want to fold operands after PeepholeOptimizer has run (or as part of
1229   // it), because it will eliminate extra copies making it easier to fold the
1230   // real source operand. We want to eliminate dead instructions after, so that
1231   // we see fewer uses of the copies. We then need to clean up the dead
1232   // instructions leftover after the operands are folded as well.
1233   //
1234   // XXX - Can we get away without running DeadMachineInstructionElim again?
1235   addPass(&SIFoldOperandsID);
1236   if (EnableDPPCombine)
1237     addPass(&GCNDPPCombineID);
1238   addPass(&SILoadStoreOptimizerID);
1239   if (isPassEnabled(EnableSDWAPeephole)) {
1240     addPass(&SIPeepholeSDWAID);
1241     addPass(&EarlyMachineLICMID);
1242     addPass(&MachineCSEID);
1243     addPass(&SIFoldOperandsID);
1244   }
1245   addPass(&DeadMachineInstructionElimID);
1246   addPass(createSIShrinkInstructionsPass());
1247 }
1248 
1249 bool GCNPassConfig::addILPOpts() {
1250   if (EnableEarlyIfConversion)
1251     addPass(&EarlyIfConverterID);
1252 
1253   TargetPassConfig::addILPOpts();
1254   return false;
1255 }
1256 
1257 bool GCNPassConfig::addInstSelector() {
1258   AMDGPUPassConfig::addInstSelector();
1259   addPass(&SIFixSGPRCopiesID);
1260   addPass(createSILowerI1CopiesPass());
1261   return false;
1262 }
1263 
1264 bool GCNPassConfig::addIRTranslator() {
1265   addPass(new IRTranslator(getOptLevel()));
1266   return false;
1267 }
1268 
1269 void GCNPassConfig::addPreLegalizeMachineIR() {
1270   bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1271   addPass(createAMDGPUPreLegalizeCombiner(IsOptNone));
1272   addPass(new Localizer());
1273 }
1274 
1275 bool GCNPassConfig::addLegalizeMachineIR() {
1276   addPass(new Legalizer());
1277   return false;
1278 }
1279 
1280 void GCNPassConfig::addPreRegBankSelect() {
1281   bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1282   addPass(createAMDGPUPostLegalizeCombiner(IsOptNone));
1283 }
1284 
1285 bool GCNPassConfig::addRegBankSelect() {
1286   addPass(new RegBankSelect());
1287   return false;
1288 }
1289 
1290 void GCNPassConfig::addPreGlobalInstructionSelect() {
1291   bool IsOptNone = getOptLevel() == CodeGenOpt::None;
1292   addPass(createAMDGPURegBankCombiner(IsOptNone));
1293 }
1294 
1295 bool GCNPassConfig::addGlobalInstructionSelect() {
1296   addPass(new InstructionSelect(getOptLevel()));
1297   return false;
1298 }
1299 
1300 void GCNPassConfig::addPreRegAlloc() {
1301   if (LateCFGStructurize) {
1302     addPass(createAMDGPUMachineCFGStructurizerPass());
1303   }
1304 }
1305 
1306 void GCNPassConfig::addFastRegAlloc() {
1307   // FIXME: We have to disable the verifier here because of PHIElimination +
1308   // TwoAddressInstructions disabling it.
1309 
1310   // This must be run immediately after phi elimination and before
1311   // TwoAddressInstructions, otherwise the processing of the tied operand of
1312   // SI_ELSE will introduce a copy of the tied operand source after the else.
1313   insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
1314 
1315   insertPass(&TwoAddressInstructionPassID, &SIWholeQuadModeID);
1316   insertPass(&TwoAddressInstructionPassID, &SIPreAllocateWWMRegsID);
1317 
1318   TargetPassConfig::addFastRegAlloc();
1319 }
1320 
1321 void GCNPassConfig::addOptimizedRegAlloc() {
1322   // Allow the scheduler to run before SIWholeQuadMode inserts exec manipulation
1323   // instructions that cause scheduling barriers.
1324   insertPass(&MachineSchedulerID, &SIWholeQuadModeID);
1325   insertPass(&MachineSchedulerID, &SIPreAllocateWWMRegsID);
1326 
1327   if (OptExecMaskPreRA)
1328     insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
1329 
1330   if (isPassEnabled(EnablePreRAOptimizations))
1331     insertPass(&RenameIndependentSubregsID, &GCNPreRAOptimizationsID);
1332 
1333   // This is not an essential optimization and it has a noticeable impact on
1334   // compilation time, so we only enable it from O2.
1335   if (TM->getOptLevel() > CodeGenOpt::Less)
1336     insertPass(&MachineSchedulerID, &SIFormMemoryClausesID);
1337 
1338   // FIXME: when an instruction has a Killed operand, and the instruction is
1339   // inside a bundle, seems only the BUNDLE instruction appears as the Kills of
1340   // the register in LiveVariables, this would trigger a failure in verifier,
1341   // we should fix it and enable the verifier.
1342   if (OptVGPRLiveRange)
1343     insertPass(&LiveVariablesID, &SIOptimizeVGPRLiveRangeID, false);
1344   // This must be run immediately after phi elimination and before
1345   // TwoAddressInstructions, otherwise the processing of the tied operand of
1346   // SI_ELSE will introduce a copy of the tied operand source after the else.
1347   insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
1348 
1349   if (EnableDCEInRA)
1350     insertPass(&DetectDeadLanesID, &DeadMachineInstructionElimID);
1351 
1352   TargetPassConfig::addOptimizedRegAlloc();
1353 }
1354 
1355 bool GCNPassConfig::addPreRewrite() {
1356   if (EnableRegReassign)
1357     addPass(&GCNNSAReassignID);
1358   return true;
1359 }
1360 
1361 FunctionPass *GCNPassConfig::createSGPRAllocPass(bool Optimized) {
1362   // Initialize the global default.
1363   llvm::call_once(InitializeDefaultSGPRRegisterAllocatorFlag,
1364                   initializeDefaultSGPRRegisterAllocatorOnce);
1365 
1366   RegisterRegAlloc::FunctionPassCtor Ctor = SGPRRegisterRegAlloc::getDefault();
1367   if (Ctor != useDefaultRegisterAllocator)
1368     return Ctor();
1369 
1370   if (Optimized)
1371     return createGreedyRegisterAllocator(onlyAllocateSGPRs);
1372 
1373   return createFastRegisterAllocator(onlyAllocateSGPRs, false);
1374 }
1375 
1376 FunctionPass *GCNPassConfig::createVGPRAllocPass(bool Optimized) {
1377   // Initialize the global default.
1378   llvm::call_once(InitializeDefaultVGPRRegisterAllocatorFlag,
1379                   initializeDefaultVGPRRegisterAllocatorOnce);
1380 
1381   RegisterRegAlloc::FunctionPassCtor Ctor = VGPRRegisterRegAlloc::getDefault();
1382   if (Ctor != useDefaultRegisterAllocator)
1383     return Ctor();
1384 
1385   if (Optimized)
1386     return createGreedyVGPRRegisterAllocator();
1387 
1388   return createFastVGPRRegisterAllocator();
1389 }
1390 
1391 FunctionPass *GCNPassConfig::createRegAllocPass(bool Optimized) {
1392   llvm_unreachable("should not be used");
1393 }
1394 
1395 static const char RegAllocOptNotSupportedMessage[] =
1396   "-regalloc not supported with amdgcn. Use -sgpr-regalloc and -vgpr-regalloc";
1397 
1398 bool GCNPassConfig::addRegAssignAndRewriteFast() {
1399   if (!usingDefaultRegAlloc())
1400     report_fatal_error(RegAllocOptNotSupportedMessage);
1401 
1402   addPass(createSGPRAllocPass(false));
1403 
1404   // Equivalent of PEI for SGPRs.
1405   addPass(&SILowerSGPRSpillsID);
1406 
1407   addPass(createVGPRAllocPass(false));
1408   return true;
1409 }
1410 
1411 bool GCNPassConfig::addRegAssignAndRewriteOptimized() {
1412   if (!usingDefaultRegAlloc())
1413     report_fatal_error(RegAllocOptNotSupportedMessage);
1414 
1415   addPass(createSGPRAllocPass(true));
1416 
1417   // Commit allocated register changes. This is mostly necessary because too
1418   // many things rely on the use lists of the physical registers, such as the
1419   // verifier. This is only necessary with allocators which use LiveIntervals,
1420   // since FastRegAlloc does the replacments itself.
1421   addPass(createVirtRegRewriter(false));
1422 
1423   // Equivalent of PEI for SGPRs.
1424   addPass(&SILowerSGPRSpillsID);
1425 
1426   addPass(createVGPRAllocPass(true));
1427 
1428   addPreRewrite();
1429   addPass(&VirtRegRewriterID);
1430 
1431   return true;
1432 }
1433 
1434 void GCNPassConfig::addPostRegAlloc() {
1435   addPass(&SIFixVGPRCopiesID);
1436   if (getOptLevel() > CodeGenOpt::None)
1437     addPass(&SIOptimizeExecMaskingID);
1438   TargetPassConfig::addPostRegAlloc();
1439 }
1440 
1441 void GCNPassConfig::addPreSched2() {
1442   addPass(&SIPostRABundlerID);
1443 }
1444 
1445 void GCNPassConfig::addPreEmitPass() {
1446   addPass(createSIMemoryLegalizerPass());
1447   addPass(createSIInsertWaitcntsPass());
1448 
1449   if (TM->getOptLevel() > CodeGenOpt::None)
1450     addPass(createSIShrinkInstructionsPass());
1451 
1452   addPass(createSIModeRegisterPass());
1453 
1454   if (getOptLevel() > CodeGenOpt::None)
1455     addPass(&SIInsertHardClausesID);
1456 
1457   addPass(&SILateBranchLoweringPassID);
1458   if (getOptLevel() > CodeGenOpt::None)
1459     addPass(&SIPreEmitPeepholeID);
1460   // The hazard recognizer that runs as part of the post-ra scheduler does not
1461   // guarantee to be able handle all hazards correctly. This is because if there
1462   // are multiple scheduling regions in a basic block, the regions are scheduled
1463   // bottom up, so when we begin to schedule a region we don't know what
1464   // instructions were emitted directly before it.
1465   //
1466   // Here we add a stand-alone hazard recognizer pass which can handle all
1467   // cases.
1468   addPass(&PostRAHazardRecognizerID);
1469   addPass(&BranchRelaxationPassID);
1470 }
1471 
1472 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
1473   return new GCNPassConfig(*this, PM);
1474 }
1475 
1476 yaml::MachineFunctionInfo *GCNTargetMachine::createDefaultFuncInfoYAML() const {
1477   return new yaml::SIMachineFunctionInfo();
1478 }
1479 
1480 yaml::MachineFunctionInfo *
1481 GCNTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const {
1482   const SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1483   return new yaml::SIMachineFunctionInfo(
1484       *MFI, *MF.getSubtarget().getRegisterInfo(), MF);
1485 }
1486 
1487 bool GCNTargetMachine::parseMachineFunctionInfo(
1488     const yaml::MachineFunctionInfo &MFI_, PerFunctionMIParsingState &PFS,
1489     SMDiagnostic &Error, SMRange &SourceRange) const {
1490   const yaml::SIMachineFunctionInfo &YamlMFI =
1491       reinterpret_cast<const yaml::SIMachineFunctionInfo &>(MFI_);
1492   MachineFunction &MF = PFS.MF;
1493   SIMachineFunctionInfo *MFI = MF.getInfo<SIMachineFunctionInfo>();
1494 
1495   if (MFI->initializeBaseYamlFields(YamlMFI, MF, PFS, Error, SourceRange))
1496     return true;
1497 
1498   if (MFI->Occupancy == 0) {
1499     // Fixup the subtarget dependent default value.
1500     const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
1501     MFI->Occupancy = ST.computeOccupancy(MF.getFunction(), MFI->getLDSSize());
1502   }
1503 
1504   auto parseRegister = [&](const yaml::StringValue &RegName, Register &RegVal) {
1505     Register TempReg;
1506     if (parseNamedRegisterReference(PFS, TempReg, RegName.Value, Error)) {
1507       SourceRange = RegName.SourceRange;
1508       return true;
1509     }
1510     RegVal = TempReg;
1511 
1512     return false;
1513   };
1514 
1515   auto diagnoseRegisterClass = [&](const yaml::StringValue &RegName) {
1516     // Create a diagnostic for a the register string literal.
1517     const MemoryBuffer &Buffer =
1518         *PFS.SM->getMemoryBuffer(PFS.SM->getMainFileID());
1519     Error = SMDiagnostic(*PFS.SM, SMLoc(), Buffer.getBufferIdentifier(), 1,
1520                          RegName.Value.size(), SourceMgr::DK_Error,
1521                          "incorrect register class for field", RegName.Value,
1522                          None, None);
1523     SourceRange = RegName.SourceRange;
1524     return true;
1525   };
1526 
1527   if (parseRegister(YamlMFI.ScratchRSrcReg, MFI->ScratchRSrcReg) ||
1528       parseRegister(YamlMFI.FrameOffsetReg, MFI->FrameOffsetReg) ||
1529       parseRegister(YamlMFI.StackPtrOffsetReg, MFI->StackPtrOffsetReg))
1530     return true;
1531 
1532   if (MFI->ScratchRSrcReg != AMDGPU::PRIVATE_RSRC_REG &&
1533       !AMDGPU::SGPR_128RegClass.contains(MFI->ScratchRSrcReg)) {
1534     return diagnoseRegisterClass(YamlMFI.ScratchRSrcReg);
1535   }
1536 
1537   if (MFI->FrameOffsetReg != AMDGPU::FP_REG &&
1538       !AMDGPU::SGPR_32RegClass.contains(MFI->FrameOffsetReg)) {
1539     return diagnoseRegisterClass(YamlMFI.FrameOffsetReg);
1540   }
1541 
1542   if (MFI->StackPtrOffsetReg != AMDGPU::SP_REG &&
1543       !AMDGPU::SGPR_32RegClass.contains(MFI->StackPtrOffsetReg)) {
1544     return diagnoseRegisterClass(YamlMFI.StackPtrOffsetReg);
1545   }
1546 
1547   auto parseAndCheckArgument = [&](const Optional<yaml::SIArgument> &A,
1548                                    const TargetRegisterClass &RC,
1549                                    ArgDescriptor &Arg, unsigned UserSGPRs,
1550                                    unsigned SystemSGPRs) {
1551     // Skip parsing if it's not present.
1552     if (!A)
1553       return false;
1554 
1555     if (A->IsRegister) {
1556       Register Reg;
1557       if (parseNamedRegisterReference(PFS, Reg, A->RegisterName.Value, Error)) {
1558         SourceRange = A->RegisterName.SourceRange;
1559         return true;
1560       }
1561       if (!RC.contains(Reg))
1562         return diagnoseRegisterClass(A->RegisterName);
1563       Arg = ArgDescriptor::createRegister(Reg);
1564     } else
1565       Arg = ArgDescriptor::createStack(A->StackOffset);
1566     // Check and apply the optional mask.
1567     if (A->Mask)
1568       Arg = ArgDescriptor::createArg(Arg, A->Mask.getValue());
1569 
1570     MFI->NumUserSGPRs += UserSGPRs;
1571     MFI->NumSystemSGPRs += SystemSGPRs;
1572     return false;
1573   };
1574 
1575   if (YamlMFI.ArgInfo &&
1576       (parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentBuffer,
1577                              AMDGPU::SGPR_128RegClass,
1578                              MFI->ArgInfo.PrivateSegmentBuffer, 4, 0) ||
1579        parseAndCheckArgument(YamlMFI.ArgInfo->DispatchPtr,
1580                              AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchPtr,
1581                              2, 0) ||
1582        parseAndCheckArgument(YamlMFI.ArgInfo->QueuePtr, AMDGPU::SReg_64RegClass,
1583                              MFI->ArgInfo.QueuePtr, 2, 0) ||
1584        parseAndCheckArgument(YamlMFI.ArgInfo->KernargSegmentPtr,
1585                              AMDGPU::SReg_64RegClass,
1586                              MFI->ArgInfo.KernargSegmentPtr, 2, 0) ||
1587        parseAndCheckArgument(YamlMFI.ArgInfo->DispatchID,
1588                              AMDGPU::SReg_64RegClass, MFI->ArgInfo.DispatchID,
1589                              2, 0) ||
1590        parseAndCheckArgument(YamlMFI.ArgInfo->FlatScratchInit,
1591                              AMDGPU::SReg_64RegClass,
1592                              MFI->ArgInfo.FlatScratchInit, 2, 0) ||
1593        parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentSize,
1594                              AMDGPU::SGPR_32RegClass,
1595                              MFI->ArgInfo.PrivateSegmentSize, 0, 0) ||
1596        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDX,
1597                              AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDX,
1598                              0, 1) ||
1599        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDY,
1600                              AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDY,
1601                              0, 1) ||
1602        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupIDZ,
1603                              AMDGPU::SGPR_32RegClass, MFI->ArgInfo.WorkGroupIDZ,
1604                              0, 1) ||
1605        parseAndCheckArgument(YamlMFI.ArgInfo->WorkGroupInfo,
1606                              AMDGPU::SGPR_32RegClass,
1607                              MFI->ArgInfo.WorkGroupInfo, 0, 1) ||
1608        parseAndCheckArgument(YamlMFI.ArgInfo->PrivateSegmentWaveByteOffset,
1609                              AMDGPU::SGPR_32RegClass,
1610                              MFI->ArgInfo.PrivateSegmentWaveByteOffset, 0, 1) ||
1611        parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitArgPtr,
1612                              AMDGPU::SReg_64RegClass,
1613                              MFI->ArgInfo.ImplicitArgPtr, 0, 0) ||
1614        parseAndCheckArgument(YamlMFI.ArgInfo->ImplicitBufferPtr,
1615                              AMDGPU::SReg_64RegClass,
1616                              MFI->ArgInfo.ImplicitBufferPtr, 2, 0) ||
1617        parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDX,
1618                              AMDGPU::VGPR_32RegClass,
1619                              MFI->ArgInfo.WorkItemIDX, 0, 0) ||
1620        parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDY,
1621                              AMDGPU::VGPR_32RegClass,
1622                              MFI->ArgInfo.WorkItemIDY, 0, 0) ||
1623        parseAndCheckArgument(YamlMFI.ArgInfo->WorkItemIDZ,
1624                              AMDGPU::VGPR_32RegClass,
1625                              MFI->ArgInfo.WorkItemIDZ, 0, 0)))
1626     return true;
1627 
1628   MFI->Mode.IEEE = YamlMFI.Mode.IEEE;
1629   MFI->Mode.DX10Clamp = YamlMFI.Mode.DX10Clamp;
1630   MFI->Mode.FP32InputDenormals = YamlMFI.Mode.FP32InputDenormals;
1631   MFI->Mode.FP32OutputDenormals = YamlMFI.Mode.FP32OutputDenormals;
1632   MFI->Mode.FP64FP16InputDenormals = YamlMFI.Mode.FP64FP16InputDenormals;
1633   MFI->Mode.FP64FP16OutputDenormals = YamlMFI.Mode.FP64FP16OutputDenormals;
1634 
1635   return false;
1636 }
1637