1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9
10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12
13 #include "llvm/IR/PassManager.h"
14 #include "llvm/Pass.h"
15 #include "llvm/Support/AMDGPUAddrSpace.h"
16 #include "llvm/Support/CodeGen.h"
17
18 namespace llvm {
19
20 class AMDGPUTargetMachine;
21 class TargetMachine;
22
23 // GlobalISel passes
24 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
25 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
26 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
27 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
28 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
29 void initializeAMDGPURegBankCombinerPass(PassRegistry &);
30
31 void initializeAMDGPURegBankSelectPass(PassRegistry &);
32
33 // SI Passes
34 FunctionPass *createGCNDPPCombinePass();
35 FunctionPass *createSIAnnotateControlFlowPass();
36 FunctionPass *createSIFoldOperandsPass();
37 FunctionPass *createSIPeepholeSDWAPass();
38 FunctionPass *createSILowerI1CopiesPass();
39 FunctionPass *createAMDGPUGlobalISelDivergenceLoweringPass();
40 FunctionPass *createSIShrinkInstructionsPass();
41 FunctionPass *createSILoadStoreOptimizerPass();
42 FunctionPass *createSIWholeQuadModePass();
43 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
44 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
45 FunctionPass *createSIOptimizeVGPRLiveRangePass();
46 FunctionPass *createSIFixSGPRCopiesPass();
47 FunctionPass *createLowerWWMCopiesPass();
48 FunctionPass *createSIMemoryLegalizerPass();
49 FunctionPass *createSIInsertWaitcntsPass();
50 FunctionPass *createSIPreAllocateWWMRegsPass();
51 FunctionPass *createSIFormMemoryClausesPass();
52
53 FunctionPass *createSIPostRABundlerPass();
54 FunctionPass *createAMDGPUImageIntrinsicOptimizerPass(const TargetMachine *);
55 ModulePass *createAMDGPURemoveIncompatibleFunctionsPass(const TargetMachine *);
56 FunctionPass *createAMDGPUCodeGenPreparePass();
57 FunctionPass *createAMDGPULateCodeGenPreparePass();
58 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
59 FunctionPass *createAMDGPURewriteOutArgumentsPass();
60 ModulePass *
61 createAMDGPULowerModuleLDSLegacyPass(const AMDGPUTargetMachine *TM = nullptr);
62 FunctionPass *createSIModeRegisterPass();
63 FunctionPass *createGCNPreRAOptimizationsPass();
64
65 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
AMDGPUSimplifyLibCallsPassAMDGPUSimplifyLibCallsPass66 AMDGPUSimplifyLibCallsPass() {}
67 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
68 };
69
70 struct AMDGPUImageIntrinsicOptimizerPass
71 : PassInfoMixin<AMDGPUImageIntrinsicOptimizerPass> {
AMDGPUImageIntrinsicOptimizerPassAMDGPUImageIntrinsicOptimizerPass72 AMDGPUImageIntrinsicOptimizerPass(TargetMachine &TM) : TM(TM) {}
73 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
74
75 private:
76 TargetMachine &TM;
77 };
78
79 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
80 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
81 };
82
83 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
84
85 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
86 extern char &AMDGPUMachineCFGStructurizerID;
87
88 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
89
90 Pass *createAMDGPUAnnotateKernelFeaturesPass();
91 Pass *createAMDGPUAttributorLegacyPass();
92 void initializeAMDGPUAttributorLegacyPass(PassRegistry &);
93 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
94 extern char &AMDGPUAnnotateKernelFeaturesID;
95
96 // DPP/Iterative option enables the atomic optimizer with given strategy
97 // whereas None disables the atomic optimizer.
98 enum class ScanOptions { DPP, Iterative, None };
99 FunctionPass *createAMDGPUAtomicOptimizerPass(ScanOptions ScanStrategy);
100 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
101 extern char &AMDGPUAtomicOptimizerID;
102
103 ModulePass *createAMDGPUCtorDtorLoweringLegacyPass();
104 void initializeAMDGPUCtorDtorLoweringLegacyPass(PassRegistry &);
105 extern char &AMDGPUCtorDtorLoweringLegacyPassID;
106
107 FunctionPass *createAMDGPULowerKernelArgumentsPass();
108 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
109 extern char &AMDGPULowerKernelArgumentsID;
110
111 FunctionPass *createAMDGPUPromoteKernelArgumentsPass();
112 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &);
113 extern char &AMDGPUPromoteKernelArgumentsID;
114
115 struct AMDGPUPromoteKernelArgumentsPass
116 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> {
117 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
118 };
119
120 ModulePass *createAMDGPULowerKernelAttributesPass();
121 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
122 extern char &AMDGPULowerKernelAttributesID;
123
124 struct AMDGPULowerKernelAttributesPass
125 : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
126 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
127 };
128
129 void initializeAMDGPULowerModuleLDSLegacyPass(PassRegistry &);
130 extern char &AMDGPULowerModuleLDSLegacyPassID;
131
132 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
133 const AMDGPUTargetMachine &TM;
AMDGPULowerModuleLDSPassAMDGPULowerModuleLDSPass134 AMDGPULowerModuleLDSPass(const AMDGPUTargetMachine &TM_) : TM(TM_) {}
135
136 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
137 };
138
139 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
140 extern char &AMDGPURewriteOutArgumentsID;
141
142 void initializeGCNDPPCombinePass(PassRegistry &);
143 extern char &GCNDPPCombineID;
144
145 void initializeSIFoldOperandsPass(PassRegistry &);
146 extern char &SIFoldOperandsID;
147
148 void initializeSIPeepholeSDWAPass(PassRegistry &);
149 extern char &SIPeepholeSDWAID;
150
151 void initializeSIShrinkInstructionsPass(PassRegistry&);
152 extern char &SIShrinkInstructionsID;
153
154 void initializeSIFixSGPRCopiesPass(PassRegistry &);
155 extern char &SIFixSGPRCopiesID;
156
157 void initializeSIFixVGPRCopiesPass(PassRegistry &);
158 extern char &SIFixVGPRCopiesID;
159
160 void initializeSILowerWWMCopiesPass(PassRegistry &);
161 extern char &SILowerWWMCopiesID;
162
163 void initializeSILowerI1CopiesPass(PassRegistry &);
164 extern char &SILowerI1CopiesID;
165
166 void initializeAMDGPUGlobalISelDivergenceLoweringPass(PassRegistry &);
167 extern char &AMDGPUGlobalISelDivergenceLoweringID;
168
169 void initializeAMDGPUMarkLastScratchLoadPass(PassRegistry &);
170 extern char &AMDGPUMarkLastScratchLoadID;
171
172 void initializeSILowerSGPRSpillsPass(PassRegistry &);
173 extern char &SILowerSGPRSpillsID;
174
175 void initializeSILoadStoreOptimizerPass(PassRegistry &);
176 extern char &SILoadStoreOptimizerID;
177
178 void initializeSIWholeQuadModePass(PassRegistry &);
179 extern char &SIWholeQuadModeID;
180
181 void initializeSILowerControlFlowPass(PassRegistry &);
182 extern char &SILowerControlFlowID;
183
184 void initializeSIPreEmitPeepholePass(PassRegistry &);
185 extern char &SIPreEmitPeepholeID;
186
187 void initializeSILateBranchLoweringPass(PassRegistry &);
188 extern char &SILateBranchLoweringPassID;
189
190 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
191 extern char &SIOptimizeExecMaskingID;
192
193 void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
194 extern char &SIPreAllocateWWMRegsID;
195
196 void initializeAMDGPUImageIntrinsicOptimizerPass(PassRegistry &);
197 extern char &AMDGPUImageIntrinsicOptimizerID;
198
199 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
200 extern char &AMDGPUPerfHintAnalysisID;
201
202 void initializeGCNRegPressurePrinterPass(PassRegistry &);
203 extern char &GCNRegPressurePrinterID;
204
205 // Passes common to R600 and SI
206 FunctionPass *createAMDGPUPromoteAlloca();
207 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
208 extern char &AMDGPUPromoteAllocaID;
209
210 FunctionPass *createAMDGPUPromoteAllocaToVector();
211 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
212 extern char &AMDGPUPromoteAllocaToVectorID;
213
214 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
AMDGPUPromoteAllocaPassAMDGPUPromoteAllocaPass215 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
216 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
217
218 private:
219 TargetMachine &TM;
220 };
221
222 struct AMDGPUPromoteAllocaToVectorPass
223 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
AMDGPUPromoteAllocaToVectorPassAMDGPUPromoteAllocaToVectorPass224 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
225 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
226
227 private:
228 TargetMachine &TM;
229 };
230
231 struct AMDGPUAtomicOptimizerPass : PassInfoMixin<AMDGPUAtomicOptimizerPass> {
AMDGPUAtomicOptimizerPassAMDGPUAtomicOptimizerPass232 AMDGPUAtomicOptimizerPass(TargetMachine &TM, ScanOptions ScanImpl)
233 : TM(TM), ScanImpl(ScanImpl) {}
234 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
235
236 private:
237 TargetMachine &TM;
238 ScanOptions ScanImpl;
239 };
240
241 Pass *createAMDGPUStructurizeCFGPass();
242 FunctionPass *createAMDGPUISelDag(TargetMachine &TM, CodeGenOptLevel OptLevel);
243 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
244
245 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
GlobalOptAMDGPUAlwaysInlinePass246 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
247 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
248
249 private:
250 bool GlobalOpt;
251 };
252
253 class AMDGPUCodeGenPreparePass
254 : public PassInfoMixin<AMDGPUCodeGenPreparePass> {
255 private:
256 TargetMachine &TM;
257
258 public:
AMDGPUCodeGenPreparePass(TargetMachine & TM)259 AMDGPUCodeGenPreparePass(TargetMachine &TM) : TM(TM){};
260 PreservedAnalyses run(Function &, FunctionAnalysisManager &);
261 };
262
263 class AMDGPULowerKernelArgumentsPass
264 : public PassInfoMixin<AMDGPULowerKernelArgumentsPass> {
265 private:
266 TargetMachine &TM;
267
268 public:
AMDGPULowerKernelArgumentsPass(TargetMachine & TM)269 AMDGPULowerKernelArgumentsPass(TargetMachine &TM) : TM(TM){};
270 PreservedAnalyses run(Function &, FunctionAnalysisManager &);
271 };
272
273 class AMDGPUAttributorPass : public PassInfoMixin<AMDGPUAttributorPass> {
274 private:
275 TargetMachine &TM;
276
277 public:
AMDGPUAttributorPass(TargetMachine & TM)278 AMDGPUAttributorPass(TargetMachine &TM) : TM(TM){};
279 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
280 };
281
282 FunctionPass *createAMDGPUAnnotateUniformValues();
283
284 ModulePass *createAMDGPUPrintfRuntimeBinding();
285 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
286 extern char &AMDGPUPrintfRuntimeBindingID;
287
288 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
289 extern char &AMDGPUResourceUsageAnalysisID;
290
291 struct AMDGPUPrintfRuntimeBindingPass
292 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
293 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
294 };
295
296 ModulePass* createAMDGPUUnifyMetadataPass();
297 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
298 extern char &AMDGPUUnifyMetadataID;
299
300 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
301 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
302 };
303
304 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
305 extern char &SIOptimizeExecMaskingPreRAID;
306
307 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
308 extern char &SIOptimizeVGPRLiveRangeID;
309
310 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
311 extern char &AMDGPUAnnotateUniformValuesPassID;
312
313 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
314 extern char &AMDGPUCodeGenPrepareID;
315
316 void initializeAMDGPURemoveIncompatibleFunctionsPass(PassRegistry &);
317 extern char &AMDGPURemoveIncompatibleFunctionsID;
318
319 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
320 extern char &AMDGPULateCodeGenPrepareID;
321
322 FunctionPass *createAMDGPURewriteUndefForPHILegacyPass();
323 void initializeAMDGPURewriteUndefForPHILegacyPass(PassRegistry &);
324 extern char &AMDGPURewriteUndefForPHILegacyPassID;
325
326 class AMDGPURewriteUndefForPHIPass
327 : public PassInfoMixin<AMDGPURewriteUndefForPHIPass> {
328 public:
329 AMDGPURewriteUndefForPHIPass() = default;
330 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
331 };
332
333 void initializeSIAnnotateControlFlowPass(PassRegistry&);
334 extern char &SIAnnotateControlFlowPassID;
335
336 void initializeSIMemoryLegalizerPass(PassRegistry&);
337 extern char &SIMemoryLegalizerID;
338
339 void initializeSIModeRegisterPass(PassRegistry&);
340 extern char &SIModeRegisterID;
341
342 void initializeAMDGPUInsertDelayAluPass(PassRegistry &);
343 extern char &AMDGPUInsertDelayAluID;
344
345 void initializeAMDGPUInsertSingleUseVDSTPass(PassRegistry &);
346 extern char &AMDGPUInsertSingleUseVDSTID;
347
348 void initializeSIInsertHardClausesPass(PassRegistry &);
349 extern char &SIInsertHardClausesID;
350
351 void initializeSIInsertWaitcntsPass(PassRegistry&);
352 extern char &SIInsertWaitcntsID;
353
354 void initializeSIFormMemoryClausesPass(PassRegistry&);
355 extern char &SIFormMemoryClausesID;
356
357 void initializeSIPostRABundlerPass(PassRegistry&);
358 extern char &SIPostRABundlerID;
359
360 void initializeGCNCreateVOPDPass(PassRegistry &);
361 extern char &GCNCreateVOPDID;
362
363 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
364 extern char &AMDGPUUnifyDivergentExitNodesID;
365
366 ImmutablePass *createAMDGPUAAWrapperPass();
367 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
368 ImmutablePass *createAMDGPUExternalAAWrapperPass();
369 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
370
371 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
372
373 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
374 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
375 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
376
377 void initializeGCNNSAReassignPass(PassRegistry &);
378 extern char &GCNNSAReassignID;
379
380 void initializeGCNPreRALongBranchRegPass(PassRegistry &);
381 extern char &GCNPreRALongBranchRegID;
382
383 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
384 extern char &GCNPreRAOptimizationsID;
385
386 FunctionPass *createAMDGPUSetWavePriorityPass();
387 void initializeAMDGPUSetWavePriorityPass(PassRegistry &);
388
389 void initializeGCNRewritePartialRegUsesPass(llvm::PassRegistry &);
390 extern char &GCNRewritePartialRegUsesID;
391
392 namespace AMDGPU {
393 enum TargetIndex {
394 TI_CONSTDATA_START,
395 TI_SCRATCH_RSRC_DWORD0,
396 TI_SCRATCH_RSRC_DWORD1,
397 TI_SCRATCH_RSRC_DWORD2,
398 TI_SCRATCH_RSRC_DWORD3
399 };
400
401 // FIXME: Missing constant_32bit
isFlatGlobalAddrSpace(unsigned AS)402 inline bool isFlatGlobalAddrSpace(unsigned AS) {
403 return AS == AMDGPUAS::GLOBAL_ADDRESS ||
404 AS == AMDGPUAS::FLAT_ADDRESS ||
405 AS == AMDGPUAS::CONSTANT_ADDRESS ||
406 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
407 }
408
isExtendedGlobalAddrSpace(unsigned AS)409 inline bool isExtendedGlobalAddrSpace(unsigned AS) {
410 return AS == AMDGPUAS::GLOBAL_ADDRESS || AS == AMDGPUAS::CONSTANT_ADDRESS ||
411 AS == AMDGPUAS::CONSTANT_ADDRESS_32BIT ||
412 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
413 }
414
addrspacesMayAlias(unsigned AS1,unsigned AS2)415 static inline bool addrspacesMayAlias(unsigned AS1, unsigned AS2) {
416 static_assert(AMDGPUAS::MAX_AMDGPU_ADDRESS <= 9, "Addr space out of range");
417
418 if (AS1 > AMDGPUAS::MAX_AMDGPU_ADDRESS || AS2 > AMDGPUAS::MAX_AMDGPU_ADDRESS)
419 return true;
420
421 // This array is indexed by address space value enum elements 0 ... to 9
422 // clang-format off
423 static const bool ASAliasRules[10][10] = {
424 /* Flat Global Region Group Constant Private Const32 BufFatPtr BufRsrc BufStrdPtr */
425 /* Flat */ {true, true, false, true, true, true, true, true, true, true},
426 /* Global */ {true, true, false, false, true, false, true, true, true, true},
427 /* Region */ {false, false, true, false, false, false, false, false, false, false},
428 /* Group */ {true, false, false, true, false, false, false, false, false, false},
429 /* Constant */ {true, true, false, false, false, false, true, true, true, true},
430 /* Private */ {true, false, false, false, false, true, false, false, false, false},
431 /* Constant 32-bit */ {true, true, false, false, true, false, false, true, true, true},
432 /* Buffer Fat Ptr */ {true, true, false, false, true, false, true, true, true, true},
433 /* Buffer Resource */ {true, true, false, false, true, false, true, true, true, true},
434 /* Buffer Strided Ptr */ {true, true, false, false, true, false, true, true, true, true},
435 };
436 // clang-format on
437
438 return ASAliasRules[AS1][AS2];
439 }
440
441 }
442
443 } // End namespace llvm
444
445 #endif
446