1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Support/CodeGen.h" 15 16 namespace llvm { 17 18 class TargetMachine; 19 20 // GlobalISel passes 21 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 22 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 23 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 24 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 25 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 26 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 27 28 // SI Passes 29 FunctionPass *createGCNDPPCombinePass(); 30 FunctionPass *createSIAnnotateControlFlowPass(); 31 FunctionPass *createSIFoldOperandsPass(); 32 FunctionPass *createSIPeepholeSDWAPass(); 33 FunctionPass *createSILowerI1CopiesPass(); 34 FunctionPass *createSIShrinkInstructionsPass(); 35 FunctionPass *createSILoadStoreOptimizerPass(); 36 FunctionPass *createSIWholeQuadModePass(); 37 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 38 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 39 FunctionPass *createSIOptimizeVGPRLiveRangePass(); 40 FunctionPass *createSIFixSGPRCopiesPass(); 41 FunctionPass *createSIMemoryLegalizerPass(); 42 FunctionPass *createSIInsertWaitcntsPass(); 43 FunctionPass *createSIPreAllocateWWMRegsPass(); 44 FunctionPass *createSIFormMemoryClausesPass(); 45 46 FunctionPass *createSIPostRABundlerPass(); 47 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); 48 FunctionPass *createAMDGPUUseNativeCallsPass(); 49 FunctionPass *createAMDGPUCodeGenPreparePass(); 50 FunctionPass *createAMDGPULateCodeGenPreparePass(); 51 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 52 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *); 53 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *); 54 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 55 ModulePass *createAMDGPUReplaceLDSUseWithPointerPass(); 56 ModulePass *createAMDGPULowerModuleLDSPass(); 57 FunctionPass *createSIModeRegisterPass(); 58 FunctionPass *createGCNPreRAOptimizationsPass(); 59 60 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 61 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} 62 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 63 64 private: 65 TargetMachine &TM; 66 }; 67 68 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 69 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 70 }; 71 72 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 73 74 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 75 extern char &AMDGPUMachineCFGStructurizerID; 76 77 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 78 79 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 80 Pass *createAMDGPUAttributorPass(); 81 void initializeAMDGPUAttributorPass(PassRegistry &); 82 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 83 extern char &AMDGPUAnnotateKernelFeaturesID; 84 85 FunctionPass *createAMDGPUAtomicOptimizerPass(); 86 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 87 extern char &AMDGPUAtomicOptimizerID; 88 89 ModulePass *createAMDGPULowerIntrinsicsPass(); 90 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); 91 extern char &AMDGPULowerIntrinsicsID; 92 93 ModulePass *createAMDGPUFixFunctionBitcastsPass(); 94 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &); 95 extern char &AMDGPUFixFunctionBitcastsID; 96 97 ModulePass *createAMDGPUCtorDtorLoweringPass(); 98 void initializeAMDGPUCtorDtorLoweringPass(PassRegistry &); 99 extern char &AMDGPUCtorDtorLoweringID; 100 101 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 102 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 103 extern char &AMDGPULowerKernelArgumentsID; 104 105 FunctionPass *createAMDGPUPromoteKernelArgumentsPass(); 106 void initializeAMDGPUPromoteKernelArgumentsPass(PassRegistry &); 107 extern char &AMDGPUPromoteKernelArgumentsID; 108 109 struct AMDGPUPromoteKernelArgumentsPass 110 : PassInfoMixin<AMDGPUPromoteKernelArgumentsPass> { 111 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 112 }; 113 114 ModulePass *createAMDGPULowerKernelAttributesPass(); 115 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 116 extern char &AMDGPULowerKernelAttributesID; 117 118 struct AMDGPULowerKernelAttributesPass 119 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 120 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 121 }; 122 123 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); 124 extern char &AMDGPUPropagateAttributesEarlyID; 125 126 struct AMDGPUPropagateAttributesEarlyPass 127 : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> { 128 AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} 129 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 130 131 private: 132 TargetMachine &TM; 133 }; 134 135 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); 136 extern char &AMDGPUPropagateAttributesLateID; 137 138 struct AMDGPUPropagateAttributesLatePass 139 : PassInfoMixin<AMDGPUPropagateAttributesLatePass> { 140 AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} 141 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 142 143 private: 144 TargetMachine &TM; 145 }; 146 147 void initializeAMDGPUReplaceLDSUseWithPointerPass(PassRegistry &); 148 extern char &AMDGPUReplaceLDSUseWithPointerID; 149 150 struct AMDGPUReplaceLDSUseWithPointerPass 151 : PassInfoMixin<AMDGPUReplaceLDSUseWithPointerPass> { 152 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 153 }; 154 155 void initializeAMDGPULowerModuleLDSPass(PassRegistry &); 156 extern char &AMDGPULowerModuleLDSID; 157 158 struct AMDGPULowerModuleLDSPass : PassInfoMixin<AMDGPULowerModuleLDSPass> { 159 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 160 }; 161 162 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 163 extern char &AMDGPURewriteOutArgumentsID; 164 165 void initializeGCNDPPCombinePass(PassRegistry &); 166 extern char &GCNDPPCombineID; 167 168 void initializeSIFoldOperandsPass(PassRegistry &); 169 extern char &SIFoldOperandsID; 170 171 void initializeSIPeepholeSDWAPass(PassRegistry &); 172 extern char &SIPeepholeSDWAID; 173 174 void initializeSIShrinkInstructionsPass(PassRegistry&); 175 extern char &SIShrinkInstructionsID; 176 177 void initializeSIFixSGPRCopiesPass(PassRegistry &); 178 extern char &SIFixSGPRCopiesID; 179 180 void initializeSIFixVGPRCopiesPass(PassRegistry &); 181 extern char &SIFixVGPRCopiesID; 182 183 void initializeSILowerI1CopiesPass(PassRegistry &); 184 extern char &SILowerI1CopiesID; 185 186 void initializeSILowerSGPRSpillsPass(PassRegistry &); 187 extern char &SILowerSGPRSpillsID; 188 189 void initializeSILoadStoreOptimizerPass(PassRegistry &); 190 extern char &SILoadStoreOptimizerID; 191 192 void initializeSIWholeQuadModePass(PassRegistry &); 193 extern char &SIWholeQuadModeID; 194 195 void initializeSILowerControlFlowPass(PassRegistry &); 196 extern char &SILowerControlFlowID; 197 198 void initializeSIPreEmitPeepholePass(PassRegistry &); 199 extern char &SIPreEmitPeepholeID; 200 201 void initializeSILateBranchLoweringPass(PassRegistry &); 202 extern char &SILateBranchLoweringPassID; 203 204 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 205 extern char &SIOptimizeExecMaskingID; 206 207 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 208 extern char &SIPreAllocateWWMRegsID; 209 210 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &); 211 extern char &AMDGPUSimplifyLibCallsID; 212 213 void initializeAMDGPUUseNativeCallsPass(PassRegistry &); 214 extern char &AMDGPUUseNativeCallsID; 215 216 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 217 extern char &AMDGPUPerfHintAnalysisID; 218 219 // Passes common to R600 and SI 220 FunctionPass *createAMDGPUPromoteAlloca(); 221 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 222 extern char &AMDGPUPromoteAllocaID; 223 224 FunctionPass *createAMDGPUPromoteAllocaToVector(); 225 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 226 extern char &AMDGPUPromoteAllocaToVectorID; 227 228 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 229 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 230 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 231 232 private: 233 TargetMachine &TM; 234 }; 235 236 struct AMDGPUPromoteAllocaToVectorPass 237 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 238 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 239 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 240 241 private: 242 TargetMachine &TM; 243 }; 244 245 Pass *createAMDGPUStructurizeCFGPass(); 246 FunctionPass *createAMDGPUISelDag( 247 TargetMachine *TM = nullptr, 248 CodeGenOpt::Level OptLevel = CodeGenOpt::Default); 249 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 250 251 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 252 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 253 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 254 255 private: 256 bool GlobalOpt; 257 }; 258 259 FunctionPass *createAMDGPUAnnotateUniformValues(); 260 261 ModulePass *createAMDGPUPrintfRuntimeBinding(); 262 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 263 extern char &AMDGPUPrintfRuntimeBindingID; 264 265 void initializeAMDGPUResourceUsageAnalysisPass(PassRegistry &); 266 extern char &AMDGPUResourceUsageAnalysisID; 267 268 struct AMDGPUPrintfRuntimeBindingPass 269 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 270 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 271 }; 272 273 ModulePass* createAMDGPUUnifyMetadataPass(); 274 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 275 extern char &AMDGPUUnifyMetadataID; 276 277 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 278 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 279 }; 280 281 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 282 extern char &SIOptimizeExecMaskingPreRAID; 283 284 void initializeSIOptimizeVGPRLiveRangePass(PassRegistry &); 285 extern char &SIOptimizeVGPRLiveRangeID; 286 287 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 288 extern char &AMDGPUAnnotateUniformValuesPassID; 289 290 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 291 extern char &AMDGPUCodeGenPrepareID; 292 293 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 294 extern char &AMDGPULateCodeGenPrepareID; 295 296 void initializeSIAnnotateControlFlowPass(PassRegistry&); 297 extern char &SIAnnotateControlFlowPassID; 298 299 void initializeSIMemoryLegalizerPass(PassRegistry&); 300 extern char &SIMemoryLegalizerID; 301 302 void initializeSIModeRegisterPass(PassRegistry&); 303 extern char &SIModeRegisterID; 304 305 void initializeSIInsertHardClausesPass(PassRegistry &); 306 extern char &SIInsertHardClausesID; 307 308 void initializeSIInsertWaitcntsPass(PassRegistry&); 309 extern char &SIInsertWaitcntsID; 310 311 void initializeSIFormMemoryClausesPass(PassRegistry&); 312 extern char &SIFormMemoryClausesID; 313 314 void initializeSIPostRABundlerPass(PassRegistry&); 315 extern char &SIPostRABundlerID; 316 317 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 318 extern char &AMDGPUUnifyDivergentExitNodesID; 319 320 ImmutablePass *createAMDGPUAAWrapperPass(); 321 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 322 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 323 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 324 325 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 326 327 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 328 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 329 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 330 331 void initializeGCNNSAReassignPass(PassRegistry &); 332 extern char &GCNNSAReassignID; 333 334 void initializeGCNPreRAOptimizationsPass(PassRegistry &); 335 extern char &GCNPreRAOptimizationsID; 336 337 namespace AMDGPU { 338 enum TargetIndex { 339 TI_CONSTDATA_START, 340 TI_SCRATCH_RSRC_DWORD0, 341 TI_SCRATCH_RSRC_DWORD1, 342 TI_SCRATCH_RSRC_DWORD2, 343 TI_SCRATCH_RSRC_DWORD3 344 }; 345 } 346 347 /// OpenCL uses address spaces to differentiate between 348 /// various memory regions on the hardware. On the CPU 349 /// all of the address spaces point to the same memory, 350 /// however on the GPU, each address space points to 351 /// a separate piece of memory that is unique from other 352 /// memory locations. 353 namespace AMDGPUAS { 354 enum : unsigned { 355 // The maximum value for flat, generic, local, private, constant and region. 356 MAX_AMDGPU_ADDRESS = 7, 357 358 FLAT_ADDRESS = 0, ///< Address space for flat memory. 359 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). 360 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) 361 362 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). 363 LOCAL_ADDRESS = 3, ///< Address space for local memory. 364 PRIVATE_ADDRESS = 5, ///< Address space for private memory. 365 366 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. 367 368 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. 369 370 /// Address space for direct addressable parameter memory (CONST0). 371 PARAM_D_ADDRESS = 6, 372 /// Address space for indirect addressable parameter memory (VTX1). 373 PARAM_I_ADDRESS = 7, 374 375 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on 376 // this order to be able to dynamically index a constant buffer, for 377 // example: 378 // 379 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx 380 381 CONSTANT_BUFFER_0 = 8, 382 CONSTANT_BUFFER_1 = 9, 383 CONSTANT_BUFFER_2 = 10, 384 CONSTANT_BUFFER_3 = 11, 385 CONSTANT_BUFFER_4 = 12, 386 CONSTANT_BUFFER_5 = 13, 387 CONSTANT_BUFFER_6 = 14, 388 CONSTANT_BUFFER_7 = 15, 389 CONSTANT_BUFFER_8 = 16, 390 CONSTANT_BUFFER_9 = 17, 391 CONSTANT_BUFFER_10 = 18, 392 CONSTANT_BUFFER_11 = 19, 393 CONSTANT_BUFFER_12 = 20, 394 CONSTANT_BUFFER_13 = 21, 395 CONSTANT_BUFFER_14 = 22, 396 CONSTANT_BUFFER_15 = 23, 397 398 // Some places use this if the address space can't be determined. 399 UNKNOWN_ADDRESS_SPACE = ~0u, 400 }; 401 } 402 403 namespace AMDGPU { 404 405 // FIXME: Missing constant_32bit 406 inline bool isFlatGlobalAddrSpace(unsigned AS) { 407 return AS == AMDGPUAS::GLOBAL_ADDRESS || 408 AS == AMDGPUAS::FLAT_ADDRESS || 409 AS == AMDGPUAS::CONSTANT_ADDRESS || 410 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 411 } 412 } 413 414 } // End namespace llvm 415 416 #endif 417