1 //===-- AMDGPU.h - MachineFunction passes hw codegen --------------*- C++ -*-=// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 /// \file 8 //===----------------------------------------------------------------------===// 9 10 #ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 11 #define LLVM_LIB_TARGET_AMDGPU_AMDGPU_H 12 13 #include "llvm/IR/PassManager.h" 14 #include "llvm/Support/CodeGen.h" 15 16 namespace llvm { 17 18 class FunctionPass; 19 class GCNTargetMachine; 20 class ImmutablePass; 21 class ModulePass; 22 class Pass; 23 class Target; 24 class TargetMachine; 25 class TargetOptions; 26 class PassRegistry; 27 class Module; 28 29 // GlobalISel passes 30 void initializeAMDGPUPreLegalizerCombinerPass(PassRegistry &); 31 FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone); 32 void initializeAMDGPUPostLegalizerCombinerPass(PassRegistry &); 33 FunctionPass *createAMDGPUPostLegalizeCombiner(bool IsOptNone); 34 FunctionPass *createAMDGPURegBankCombiner(bool IsOptNone); 35 void initializeAMDGPURegBankCombinerPass(PassRegistry &); 36 37 // R600 Passes 38 FunctionPass *createR600VectorRegMerger(); 39 FunctionPass *createR600ExpandSpecialInstrsPass(); 40 FunctionPass *createR600EmitClauseMarkers(); 41 FunctionPass *createR600ClauseMergePass(); 42 FunctionPass *createR600Packetizer(); 43 FunctionPass *createR600ControlFlowFinalizer(); 44 FunctionPass *createAMDGPUCFGStructurizerPass(); 45 FunctionPass *createR600ISelDag(TargetMachine *TM, CodeGenOpt::Level OptLevel); 46 47 // SI Passes 48 FunctionPass *createGCNDPPCombinePass(); 49 FunctionPass *createSIAnnotateControlFlowPass(); 50 FunctionPass *createSIFoldOperandsPass(); 51 FunctionPass *createSIPeepholeSDWAPass(); 52 FunctionPass *createSILowerI1CopiesPass(); 53 FunctionPass *createSIAddIMGInitPass(); 54 FunctionPass *createSIShrinkInstructionsPass(); 55 FunctionPass *createSILoadStoreOptimizerPass(); 56 FunctionPass *createSIWholeQuadModePass(); 57 FunctionPass *createSIFixControlFlowLiveIntervalsPass(); 58 FunctionPass *createSIOptimizeExecMaskingPreRAPass(); 59 FunctionPass *createSIFixSGPRCopiesPass(); 60 FunctionPass *createSIMemoryLegalizerPass(); 61 FunctionPass *createSIInsertWaitcntsPass(); 62 FunctionPass *createSIPreAllocateWWMRegsPass(); 63 FunctionPass *createSIFormMemoryClausesPass(); 64 65 FunctionPass *createSIPostRABundlerPass(); 66 FunctionPass *createAMDGPUSimplifyLibCallsPass(const TargetMachine *); 67 FunctionPass *createAMDGPUUseNativeCallsPass(); 68 FunctionPass *createAMDGPUCodeGenPreparePass(); 69 FunctionPass *createAMDGPULateCodeGenPreparePass(); 70 FunctionPass *createAMDGPUMachineCFGStructurizerPass(); 71 FunctionPass *createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *); 72 ModulePass *createAMDGPUPropagateAttributesLatePass(const TargetMachine *); 73 FunctionPass *createAMDGPURewriteOutArgumentsPass(); 74 FunctionPass *createSIModeRegisterPass(); 75 76 struct AMDGPUSimplifyLibCallsPass : PassInfoMixin<AMDGPUSimplifyLibCallsPass> { 77 AMDGPUSimplifyLibCallsPass(TargetMachine &TM) : TM(TM) {} 78 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 79 80 private: 81 TargetMachine &TM; 82 }; 83 84 struct AMDGPUUseNativeCallsPass : PassInfoMixin<AMDGPUUseNativeCallsPass> { 85 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 86 }; 87 88 void initializeAMDGPUDAGToDAGISelPass(PassRegistry&); 89 90 void initializeAMDGPUMachineCFGStructurizerPass(PassRegistry&); 91 extern char &AMDGPUMachineCFGStructurizerID; 92 93 void initializeAMDGPUAlwaysInlinePass(PassRegistry&); 94 95 Pass *createAMDGPUAnnotateKernelFeaturesPass(); 96 void initializeAMDGPUAnnotateKernelFeaturesPass(PassRegistry &); 97 extern char &AMDGPUAnnotateKernelFeaturesID; 98 99 FunctionPass *createAMDGPUAtomicOptimizerPass(); 100 void initializeAMDGPUAtomicOptimizerPass(PassRegistry &); 101 extern char &AMDGPUAtomicOptimizerID; 102 103 ModulePass *createAMDGPULowerIntrinsicsPass(); 104 void initializeAMDGPULowerIntrinsicsPass(PassRegistry &); 105 extern char &AMDGPULowerIntrinsicsID; 106 107 ModulePass *createAMDGPUFixFunctionBitcastsPass(); 108 void initializeAMDGPUFixFunctionBitcastsPass(PassRegistry &); 109 extern char &AMDGPUFixFunctionBitcastsID; 110 111 FunctionPass *createAMDGPULowerKernelArgumentsPass(); 112 void initializeAMDGPULowerKernelArgumentsPass(PassRegistry &); 113 extern char &AMDGPULowerKernelArgumentsID; 114 115 ModulePass *createAMDGPULowerKernelAttributesPass(); 116 void initializeAMDGPULowerKernelAttributesPass(PassRegistry &); 117 extern char &AMDGPULowerKernelAttributesID; 118 119 struct AMDGPULowerKernelAttributesPass 120 : PassInfoMixin<AMDGPULowerKernelAttributesPass> { 121 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 122 }; 123 124 void initializeAMDGPUPropagateAttributesEarlyPass(PassRegistry &); 125 extern char &AMDGPUPropagateAttributesEarlyID; 126 127 struct AMDGPUPropagateAttributesEarlyPass 128 : PassInfoMixin<AMDGPUPropagateAttributesEarlyPass> { 129 AMDGPUPropagateAttributesEarlyPass(TargetMachine &TM) : TM(TM) {} 130 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 131 132 private: 133 TargetMachine &TM; 134 }; 135 136 void initializeAMDGPUPropagateAttributesLatePass(PassRegistry &); 137 extern char &AMDGPUPropagateAttributesLateID; 138 139 struct AMDGPUPropagateAttributesLatePass 140 : PassInfoMixin<AMDGPUPropagateAttributesLatePass> { 141 AMDGPUPropagateAttributesLatePass(TargetMachine &TM) : TM(TM) {} 142 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 143 144 private: 145 TargetMachine &TM; 146 }; 147 148 void initializeAMDGPURewriteOutArgumentsPass(PassRegistry &); 149 extern char &AMDGPURewriteOutArgumentsID; 150 151 void initializeGCNDPPCombinePass(PassRegistry &); 152 extern char &GCNDPPCombineID; 153 154 void initializeR600ClauseMergePassPass(PassRegistry &); 155 extern char &R600ClauseMergePassID; 156 157 void initializeR600ControlFlowFinalizerPass(PassRegistry &); 158 extern char &R600ControlFlowFinalizerID; 159 160 void initializeR600ExpandSpecialInstrsPassPass(PassRegistry &); 161 extern char &R600ExpandSpecialInstrsPassID; 162 163 void initializeR600VectorRegMergerPass(PassRegistry &); 164 extern char &R600VectorRegMergerID; 165 166 void initializeR600PacketizerPass(PassRegistry &); 167 extern char &R600PacketizerID; 168 169 void initializeSIFoldOperandsPass(PassRegistry &); 170 extern char &SIFoldOperandsID; 171 172 void initializeSIPeepholeSDWAPass(PassRegistry &); 173 extern char &SIPeepholeSDWAID; 174 175 void initializeSIShrinkInstructionsPass(PassRegistry&); 176 extern char &SIShrinkInstructionsID; 177 178 void initializeSIFixSGPRCopiesPass(PassRegistry &); 179 extern char &SIFixSGPRCopiesID; 180 181 void initializeSIFixVGPRCopiesPass(PassRegistry &); 182 extern char &SIFixVGPRCopiesID; 183 184 void initializeSILowerI1CopiesPass(PassRegistry &); 185 extern char &SILowerI1CopiesID; 186 187 void initializeSILowerSGPRSpillsPass(PassRegistry &); 188 extern char &SILowerSGPRSpillsID; 189 190 void initializeSILoadStoreOptimizerPass(PassRegistry &); 191 extern char &SILoadStoreOptimizerID; 192 193 void initializeSIWholeQuadModePass(PassRegistry &); 194 extern char &SIWholeQuadModeID; 195 196 void initializeSILowerControlFlowPass(PassRegistry &); 197 extern char &SILowerControlFlowID; 198 199 void initializeSIRemoveShortExecBranchesPass(PassRegistry &); 200 extern char &SIRemoveShortExecBranchesID; 201 202 void initializeSIPreEmitPeepholePass(PassRegistry &); 203 extern char &SIPreEmitPeepholeID; 204 205 void initializeSIInsertSkipsPass(PassRegistry &); 206 extern char &SIInsertSkipsPassID; 207 208 void initializeSIOptimizeExecMaskingPass(PassRegistry &); 209 extern char &SIOptimizeExecMaskingID; 210 211 void initializeSIPreAllocateWWMRegsPass(PassRegistry &); 212 extern char &SIPreAllocateWWMRegsID; 213 214 void initializeAMDGPUSimplifyLibCallsPass(PassRegistry &); 215 extern char &AMDGPUSimplifyLibCallsID; 216 217 void initializeAMDGPUUseNativeCallsPass(PassRegistry &); 218 extern char &AMDGPUUseNativeCallsID; 219 220 void initializeSIAddIMGInitPass(PassRegistry &); 221 extern char &SIAddIMGInitID; 222 223 void initializeAMDGPUPerfHintAnalysisPass(PassRegistry &); 224 extern char &AMDGPUPerfHintAnalysisID; 225 226 // Passes common to R600 and SI 227 FunctionPass *createAMDGPUPromoteAlloca(); 228 void initializeAMDGPUPromoteAllocaPass(PassRegistry&); 229 extern char &AMDGPUPromoteAllocaID; 230 231 FunctionPass *createAMDGPUPromoteAllocaToVector(); 232 void initializeAMDGPUPromoteAllocaToVectorPass(PassRegistry&); 233 extern char &AMDGPUPromoteAllocaToVectorID; 234 235 struct AMDGPUPromoteAllocaPass : PassInfoMixin<AMDGPUPromoteAllocaPass> { 236 AMDGPUPromoteAllocaPass(TargetMachine &TM) : TM(TM) {} 237 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 238 239 private: 240 TargetMachine &TM; 241 }; 242 243 struct AMDGPUPromoteAllocaToVectorPass 244 : PassInfoMixin<AMDGPUPromoteAllocaToVectorPass> { 245 AMDGPUPromoteAllocaToVectorPass(TargetMachine &TM) : TM(TM) {} 246 PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); 247 248 private: 249 TargetMachine &TM; 250 }; 251 252 Pass *createAMDGPUStructurizeCFGPass(); 253 FunctionPass *createAMDGPUISelDag( 254 TargetMachine *TM = nullptr, 255 CodeGenOpt::Level OptLevel = CodeGenOpt::Default); 256 ModulePass *createAMDGPUAlwaysInlinePass(bool GlobalOpt = true); 257 258 struct AMDGPUAlwaysInlinePass : PassInfoMixin<AMDGPUAlwaysInlinePass> { 259 AMDGPUAlwaysInlinePass(bool GlobalOpt = true) : GlobalOpt(GlobalOpt) {} 260 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 261 262 private: 263 bool GlobalOpt; 264 }; 265 266 ModulePass *createR600OpenCLImageTypeLoweringPass(); 267 FunctionPass *createAMDGPUAnnotateUniformValues(); 268 269 ModulePass *createAMDGPUPrintfRuntimeBinding(); 270 void initializeAMDGPUPrintfRuntimeBindingPass(PassRegistry&); 271 extern char &AMDGPUPrintfRuntimeBindingID; 272 273 struct AMDGPUPrintfRuntimeBindingPass 274 : PassInfoMixin<AMDGPUPrintfRuntimeBindingPass> { 275 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 276 }; 277 278 ModulePass* createAMDGPUUnifyMetadataPass(); 279 void initializeAMDGPUUnifyMetadataPass(PassRegistry&); 280 extern char &AMDGPUUnifyMetadataID; 281 282 struct AMDGPUUnifyMetadataPass : PassInfoMixin<AMDGPUUnifyMetadataPass> { 283 PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM); 284 }; 285 286 void initializeSIOptimizeExecMaskingPreRAPass(PassRegistry&); 287 extern char &SIOptimizeExecMaskingPreRAID; 288 289 void initializeAMDGPUAnnotateUniformValuesPass(PassRegistry&); 290 extern char &AMDGPUAnnotateUniformValuesPassID; 291 292 void initializeAMDGPUCodeGenPreparePass(PassRegistry&); 293 extern char &AMDGPUCodeGenPrepareID; 294 295 void initializeAMDGPULateCodeGenPreparePass(PassRegistry &); 296 extern char &AMDGPULateCodeGenPrepareID; 297 298 void initializeSIAnnotateControlFlowPass(PassRegistry&); 299 extern char &SIAnnotateControlFlowPassID; 300 301 void initializeSIMemoryLegalizerPass(PassRegistry&); 302 extern char &SIMemoryLegalizerID; 303 304 void initializeSIModeRegisterPass(PassRegistry&); 305 extern char &SIModeRegisterID; 306 307 void initializeSIInsertHardClausesPass(PassRegistry &); 308 extern char &SIInsertHardClausesID; 309 310 void initializeSIInsertWaitcntsPass(PassRegistry&); 311 extern char &SIInsertWaitcntsID; 312 313 void initializeSIFormMemoryClausesPass(PassRegistry&); 314 extern char &SIFormMemoryClausesID; 315 316 void initializeSIPostRABundlerPass(PassRegistry&); 317 extern char &SIPostRABundlerID; 318 319 void initializeAMDGPUUnifyDivergentExitNodesPass(PassRegistry&); 320 extern char &AMDGPUUnifyDivergentExitNodesID; 321 322 ImmutablePass *createAMDGPUAAWrapperPass(); 323 void initializeAMDGPUAAWrapperPassPass(PassRegistry&); 324 ImmutablePass *createAMDGPUExternalAAWrapperPass(); 325 void initializeAMDGPUExternalAAWrapperPass(PassRegistry&); 326 327 void initializeAMDGPUArgumentUsageInfoPass(PassRegistry &); 328 329 ModulePass *createAMDGPUOpenCLEnqueuedBlockLoweringPass(); 330 void initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(PassRegistry &); 331 extern char &AMDGPUOpenCLEnqueuedBlockLoweringID; 332 333 void initializeGCNRegBankReassignPass(PassRegistry &); 334 extern char &GCNRegBankReassignID; 335 336 void initializeGCNNSAReassignPass(PassRegistry &); 337 extern char &GCNNSAReassignID; 338 339 namespace AMDGPU { 340 enum TargetIndex { 341 TI_CONSTDATA_START, 342 TI_SCRATCH_RSRC_DWORD0, 343 TI_SCRATCH_RSRC_DWORD1, 344 TI_SCRATCH_RSRC_DWORD2, 345 TI_SCRATCH_RSRC_DWORD3 346 }; 347 } 348 349 /// OpenCL uses address spaces to differentiate between 350 /// various memory regions on the hardware. On the CPU 351 /// all of the address spaces point to the same memory, 352 /// however on the GPU, each address space points to 353 /// a separate piece of memory that is unique from other 354 /// memory locations. 355 namespace AMDGPUAS { 356 enum : unsigned { 357 // The maximum value for flat, generic, local, private, constant and region. 358 MAX_AMDGPU_ADDRESS = 7, 359 360 FLAT_ADDRESS = 0, ///< Address space for flat memory. 361 GLOBAL_ADDRESS = 1, ///< Address space for global memory (RAT0, VTX0). 362 REGION_ADDRESS = 2, ///< Address space for region memory. (GDS) 363 364 CONSTANT_ADDRESS = 4, ///< Address space for constant memory (VTX2). 365 LOCAL_ADDRESS = 3, ///< Address space for local memory. 366 PRIVATE_ADDRESS = 5, ///< Address space for private memory. 367 368 CONSTANT_ADDRESS_32BIT = 6, ///< Address space for 32-bit constant memory. 369 370 BUFFER_FAT_POINTER = 7, ///< Address space for 160-bit buffer fat pointers. 371 372 /// Address space for direct addressible parameter memory (CONST0). 373 PARAM_D_ADDRESS = 6, 374 /// Address space for indirect addressible parameter memory (VTX1). 375 PARAM_I_ADDRESS = 7, 376 377 // Do not re-order the CONSTANT_BUFFER_* enums. Several places depend on 378 // this order to be able to dynamically index a constant buffer, for 379 // example: 380 // 381 // ConstantBufferAS = CONSTANT_BUFFER_0 + CBIdx 382 383 CONSTANT_BUFFER_0 = 8, 384 CONSTANT_BUFFER_1 = 9, 385 CONSTANT_BUFFER_2 = 10, 386 CONSTANT_BUFFER_3 = 11, 387 CONSTANT_BUFFER_4 = 12, 388 CONSTANT_BUFFER_5 = 13, 389 CONSTANT_BUFFER_6 = 14, 390 CONSTANT_BUFFER_7 = 15, 391 CONSTANT_BUFFER_8 = 16, 392 CONSTANT_BUFFER_9 = 17, 393 CONSTANT_BUFFER_10 = 18, 394 CONSTANT_BUFFER_11 = 19, 395 CONSTANT_BUFFER_12 = 20, 396 CONSTANT_BUFFER_13 = 21, 397 CONSTANT_BUFFER_14 = 22, 398 CONSTANT_BUFFER_15 = 23, 399 400 // Some places use this if the address space can't be determined. 401 UNKNOWN_ADDRESS_SPACE = ~0u, 402 }; 403 } 404 405 namespace AMDGPU { 406 407 // FIXME: Missing constant_32bit 408 inline bool isFlatGlobalAddrSpace(unsigned AS) { 409 return AS == AMDGPUAS::GLOBAL_ADDRESS || 410 AS == AMDGPUAS::FLAT_ADDRESS || 411 AS == AMDGPUAS::CONSTANT_ADDRESS || 412 AS > AMDGPUAS::MAX_AMDGPU_ADDRESS; 413 } 414 } 415 416 } // End namespace llvm 417 418 #endif 419