1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 /// \file
8 //===----------------------------------------------------------------------===//
9
10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H
12
13 #include "llvm/IR/PassManager.h"
14 #include "llvm/Support/CodeGen.h"
15
16 namespace llvm {
17
18 class FunctionPass;
19 class GCNTargetMachine;
20 class ImmutablePass;
21 class MachineFunctionPass;
22 class ModulePass;
23 class Pass;
24 class Target;
25 class TargetMachine;
26 class TargetOptions;
27 class PassRegistry;
28 class Module;
29
30 // GlobalISel passes
31 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &);
32 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone);
33 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &);
34 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone);
35 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone);
36 void initializeAMDGPURegBankCombinerPass(PassRegistry &);
37
38 // R600 Passes
39 FunctionPass *createR600VectorRegMerger();
40 FunctionPass *createR600ExpandSpecialInstrsPass();
41 FunctionPass *createR600EmitClauseMarkers();
42 FunctionPass *createR600ClauseMergePass();
43 FunctionPass *createR600Packetizer();
44 FunctionPass *createR600ControlFlowFinalizer();
45 FunctionPass *createAMDGPUCFGStructurizerPass();
46 FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel);
47
48 // SI Passes
49 FunctionPass *createGCNDPPCombinePass();
50 FunctionPass *createSIAnnotateControlFlowPass();
51 FunctionPass *createSIFoldOperandsPass();
52 FunctionPass *createSIPeepholeSDWAPass();
53 FunctionPass *createSILowerI1CopiesPass();
54 FunctionPass *createSIShrinkInstructionsPass();
55 FunctionPass *createSILoadStoreOptimizerPass();
56 FunctionPass *createSIWholeQuadModePass();
57 FunctionPass *createSIFixControlFlowLiveIntervalsPass();
58 FunctionPass *createSIOptimizeExecMaskingPreRAPass();
59 FunctionPass *createSIOptimizeVGPRLiveRangePass();
60 FunctionPass *createSIFixSGPRCopiesPass();
61 FunctionPass *createSIMemoryLegalizerPass();
62 FunctionPass *createSIInsertWaitcntsPass();
63 FunctionPass *createSIPreAllocateWWMRegsPass();
64 FunctionPass *createSIFormMemoryClausesPass();
65
66 FunctionPass *createSIPostRABundlerPass();
67 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *);
68 FunctionPass *createAMDGPUUseNativeCallsPass();
69 FunctionPass *createAMDGPUCodeGenPreparePass();
70 FunctionPass *createAMDGPULateCodeGenPreparePass();
71 FunctionPass *createAMDGPUMachineCFGStructurizerPass();
72 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *);
73 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *);
74 FunctionPass *createAMDGPURewriteOutArgumentsPass();
75 ModulePass *createAMDGPUReplaceLDSUseWithPointerPass();
76 ModulePass *createAMDGPULowerModuleLDSPass();
77 FunctionPass *createSIModeRegisterPass();
78 FunctionPass *createGCNPreRAOptimizationsPass();
79
80 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> {
AMDGPUSimplifyLibCallsPassAMDGPUSimplifyLibCallsPass81 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {}
82 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
83
84 private:
85 TargetMachine &TM;
86 };
87
88 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> {
89 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
90 };
91
92 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&);
93
94 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&);
95 extern char &AMDGPUMachineCFGStructurizerID;
96
97 void initializeAMDGPUAlwaysInlinePass(PassRegistry&);
98
99 Pass *createAMDGPUAnnotateKernelFeaturesPass();
100 Pass *createAMDGPUAttributorPass();
101 void initializeAMDGPUAttributorPass(PassRegistry &);
102 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &);
103 extern char &AMDGPUAnnotateKernelFeaturesID;
104
105 FunctionPass *createAMDGPUAtomicOptimizerPass();
106 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &);
107 extern char &AMDGPUAtomicOptimizerID;
108
109 ModulePass *createAMDGPULowerIntrinsicsPass();
110 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &);
111 extern char &AMDGPULowerIntrinsicsID;
112
113 ModulePass *createAMDGPUFixFunctionBitcastsPass();
114 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &);
115 extern char &AMDGPUFixFunctionBitcastsID;
116
117 FunctionPass *createAMDGPULowerKernelArgumentsPass();
118 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &);
119 extern char &AMDGPULowerKernelArgumentsID;
120
121 ModulePass *createAMDGPULowerKernelAttributesPass();
122 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &);
123 extern char &AMDGPULowerKernelAttributesID;
124
125 struct AMDGPULowerKernelAttributesPass
126 : PassInfoMixin<AMDGPULowerKernelAttributesPass> {
127 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
128 };
129
130 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &);
131 extern char &AMDGPUPropagateAttributesEarlyID;
132
133 struct AMDGPUPropagateAttributesEarlyPass
134 : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> {
AMDGPUPropagateAttributesEarlyPassAMDGPUPropagateAttributesEarlyPass135 AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {}
136 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
137
138 private:
139 TargetMachine &TM;
140 };
141
142 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &);
143 extern char &AMDGPUPropagateAttributesLateID;
144
145 struct AMDGPUPropagateAttributesLatePass
146 : PassInfoMixin<AMDGPUPropagateAttributesLatePass> {
AMDGPUPropagateAttributesLatePassAMDGPUPropagateAttributesLatePass147 AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {}
148 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
149
150 private:
151 TargetMachine &TM;
152 };
153
154 void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &);
155 extern char &AMDGPUReplaceLDSUseWithPointerID;
156
157 struct AMDGPUReplaceLDSUseWithPointerPass
158 : PassInfoMixin<AMDGPUReplaceLDSUseWithPointerPass> {
159 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
160 };
161
162 void initializeAMDGPULowerModuleLDSPass(PassRegistry &);
163 extern char &AMDGPULowerModuleLDSID;
164
165 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> {
166 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
167 };
168
169 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &);
170 extern char &AMDGPURewriteOutArgumentsID;
171
172 void initializeGCNDPPCombinePass(PassRegistry &);
173 extern char &GCNDPPCombineID;
174
175 void initializeR600ClauseMergePassPass(PassRegistry &);
176 extern char &R600ClauseMergePassID;
177
178 void initializeR600ControlFlowFinalizerPass(PassRegistry &);
179 extern char &R600ControlFlowFinalizerID;
180
181 void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &);
182 extern char &R600ExpandSpecialInstrsPassID;
183
184 void initializeR600VectorRegMergerPass(PassRegistry &);
185 extern char &R600VectorRegMergerID;
186
187 void initializeR600PacketizerPass(PassRegistry &);
188 extern char &R600PacketizerID;
189
190 void initializeSIFoldOperandsPass(PassRegistry &);
191 extern char &SIFoldOperandsID;
192
193 void initializeSIPeepholeSDWAPass(PassRegistry &);
194 extern char &SIPeepholeSDWAID;
195
196 void initializeSIShrinkInstructionsPass(PassRegistry&);
197 extern char &SIShrinkInstructionsID;
198
199 void initializeSIFixSGPRCopiesPass(PassRegistry &);
200 extern char &SIFixSGPRCopiesID;
201
202 void initializeSIFixVGPRCopiesPass(PassRegistry &);
203 extern char &SIFixVGPRCopiesID;
204
205 void initializeSILowerI1CopiesPass(PassRegistry &);
206 extern char &SILowerI1CopiesID;
207
208 void initializeSILowerSGPRSpillsPass(PassRegistry &);
209 extern char &SILowerSGPRSpillsID;
210
211 void initializeSILoadStoreOptimizerPass(PassRegistry &);
212 extern char &SILoadStoreOptimizerID;
213
214 void initializeSIWholeQuadModePass(PassRegistry &);
215 extern char &SIWholeQuadModeID;
216
217 void initializeSILowerControlFlowPass(PassRegistry &);
218 extern char &SILowerControlFlowID;
219
220 void initializeSIPreEmitPeepholePass(PassRegistry &);
221 extern char &SIPreEmitPeepholeID;
222
223 void initializeSILateBranchLoweringPass(PassRegistry &);
224 extern char &SILateBranchLoweringPassID;
225
226 void initializeSIOptimizeExecMaskingPass(PassRegistry &);
227 extern char &SIOptimizeExecMaskingID;
228
229 void initializeSIPreAllocateWWMRegsPass(PassRegistry &);
230 extern char &SIPreAllocateWWMRegsID;
231
232 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &);
233 extern char &AMDGPUSimplifyLibCallsID;
234
235 void initializeAMDGPUUseNativeCallsPass(PassRegistry &);
236 extern char &AMDGPUUseNativeCallsID;
237
238 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &);
239 extern char &AMDGPUPerfHintAnalysisID;
240
241 // Passes common to R600 and SI
242 FunctionPass *createAMDGPUPromoteAlloca();
243 void initializeAMDGPUPromoteAllocaPass(PassRegistry&);
244 extern char &AMDGPUPromoteAllocaID;
245
246 FunctionPass *createAMDGPUPromoteAllocaToVector();
247 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&);
248 extern char &AMDGPUPromoteAllocaToVectorID;
249
250 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> {
AMDGPUPromoteAllocaPassAMDGPUPromoteAllocaPass251 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {}
252 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
253
254 private:
255 TargetMachine &TM;
256 };
257
258 struct AMDGPUPromoteAllocaToVectorPass
259 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> {
AMDGPUPromoteAllocaToVectorPassAMDGPUPromoteAllocaToVectorPass260 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {}
261 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
262
263 private:
264 TargetMachine &TM;
265 };
266
267 Pass *createAMDGPUStructurizeCFGPass();
268 FunctionPass *createAMDGPUISelDag(
269 TargetMachine *TM = nullptr,
270 CodeGenOpt::Level OptLevel = CodeGenOpt::Default);
271 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true);
272
273 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> {
GlobalOptAMDGPUAlwaysInlinePass274 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {}
275 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
276
277 private:
278 bool GlobalOpt;
279 };
280
281 ModulePass *createR600OpenCLImageTypeLoweringPass();
282 FunctionPass *createAMDGPUAnnotateUniformValues();
283
284 ModulePass *createAMDGPUPrintfRuntimeBinding();
285 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&);
286 extern char &AMDGPUPrintfRuntimeBindingID;
287
288 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &);
289 extern char &AMDGPUResourceUsageAnalysisID;
290
291 struct AMDGPUPrintfRuntimeBindingPass
292 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> {
293 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
294 };
295
296 ModulePass* createAMDGPUUnifyMetadataPass();
297 void initializeAMDGPUUnifyMetadataPass(PassRegistry&);
298 extern char &AMDGPUUnifyMetadataID;
299
300 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> {
301 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
302 };
303
304 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&);
305 extern char &SIOptimizeExecMaskingPreRAID;
306
307 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &);
308 extern char &SIOptimizeVGPRLiveRangeID;
309
310 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&);
311 extern char &AMDGPUAnnotateUniformValuesPassID;
312
313 void initializeAMDGPUCodeGenPreparePass(PassRegistry&);
314 extern char &AMDGPUCodeGenPrepareID;
315
316 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &);
317 extern char &AMDGPULateCodeGenPrepareID;
318
319 void initializeSIAnnotateControlFlowPass(PassRegistry&);
320 extern char &SIAnnotateControlFlowPassID;
321
322 void initializeSIMemoryLegalizerPass(PassRegistry&);
323 extern char &SIMemoryLegalizerID;
324
325 void initializeSIModeRegisterPass(PassRegistry&);
326 extern char &SIModeRegisterID;
327
328 void initializeSIInsertHardClausesPass(PassRegistry &);
329 extern char &SIInsertHardClausesID;
330
331 void initializeSIInsertWaitcntsPass(PassRegistry&);
332 extern char &SIInsertWaitcntsID;
333
334 void initializeSIFormMemoryClausesPass(PassRegistry&);
335 extern char &SIFormMemoryClausesID;
336
337 void initializeSIPostRABundlerPass(PassRegistry&);
338 extern char &SIPostRABundlerID;
339
340 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&);
341 extern char &AMDGPUUnifyDivergentExitNodesID;
342
343 ImmutablePass *createAMDGPUAAWrapperPass();
344 void initializeAMDGPUAAWrapperPassPass(PassRegistry&);
345 ImmutablePass *createAMDGPUExternalAAWrapperPass();
346 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&);
347
348 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &);
349
350 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass();
351 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &);
352 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID;
353
354 void initializeGCNNSAReassignPass(PassRegistry &);
355 extern char &GCNNSAReassignID;
356
357 void initializeGCNPreRAOptimizationsPass(PassRegistry &);
358 extern char &GCNPreRAOptimizationsID;
359
360 namespace AMDGPU {
361 enum TargetIndex {
362 TI_CONSTDATA_START,
363 TI_SCRATCH_RSRC_DWORD0,
364 TI_SCRATCH_RSRC_DWORD1,
365 TI_SCRATCH_RSRC_DWORD2,
366 TI_SCRATCH_RSRC_DWORD3
367 };
368 }
369
370 /// OpenCL uses address spaces to differentiate between
371 /// various memory regions on the hardware. On the CPU
372 /// all of the address spaces point to the same memory,
373 /// however on the GPU, each address space points to
374 /// a separate piece of memory that is unique from other
375 /// memory locations.
376 namespace AMDGPUAS {
377 enum : unsigned {
378 // The maximum value for flat, generic, local, private, constant and region.
379 MAX_AMDGPU_ADDRESS = 7,
380
381 FLAT_ADDRESS = 0, ///< Address space for flat memory.
382 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0).
383 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS)
384
385 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2).
386 LOCAL_ADDRESS = 3, ///< Address space for local memory.
387 PRIVATE_ADDRESS = 5, ///< Address space for private memory.
388
389 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory.
390
391 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers.
392
393 /// Address space for direct addressible parameter memory (CONST0).
394 PARAM_D_ADDRESS = 6,
395 /// Address space for indirect addressible parameter memory (VTX1).
396 PARAM_I_ADDRESS = 7,
397
398 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on
399 // this order to be able to dynamically index a constant buffer, for
400 // example:
401 //
402 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx
403
404 CONSTANT_BUFFER_0 = 8,
405 CONSTANT_BUFFER_1 = 9,
406 CONSTANT_BUFFER_2 = 10,
407 CONSTANT_BUFFER_3 = 11,
408 CONSTANT_BUFFER_4 = 12,
409 CONSTANT_BUFFER_5 = 13,
410 CONSTANT_BUFFER_6 = 14,
411 CONSTANT_BUFFER_7 = 15,
412 CONSTANT_BUFFER_8 = 16,
413 CONSTANT_BUFFER_9 = 17,
414 CONSTANT_BUFFER_10 = 18,
415 CONSTANT_BUFFER_11 = 19,
416 CONSTANT_BUFFER_12 = 20,
417 CONSTANT_BUFFER_13 = 21,
418 CONSTANT_BUFFER_14 = 22,
419 CONSTANT_BUFFER_15 = 23,
420
421 // Some places use this if the address space can't be determined.
422 UNKNOWN_ADDRESS_SPACE = ~0u,
423 };
424 }
425
426 namespace AMDGPU {
427
428 // FIXME: Missing constant_32bit
isFlatGlobalAddrSpace(unsigned AS)429 inline bool isFlatGlobalAddrSpace(unsigned AS) {
430 return AS == AMDGPUAS::GLOBAL_ADDRESS ||
431 AS == AMDGPUAS::FLAT_ADDRESS ||
432 AS == AMDGPUAS::CONSTANT_ADDRESS ||
433 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS;
434 }
435 }
436
437 } // End namespace llvm
438
439 #endif
440