1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief The AMDGPU target machine contains all of the hardware specific 12 /// information needed to emit code for R600 and SI GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUTargetMachine.h" 17 #include "AMDGPUHSATargetObjectFile.h" 18 #include "AMDGPU.h" 19 #include "AMDGPUTargetTransformInfo.h" 20 #include "R600ISelLowering.h" 21 #include "R600InstrInfo.h" 22 #include "R600MachineScheduler.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "llvm/Analysis/Passes.h" 26 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/CodeGen/MachineModuleInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/IR/Verifier.h" 31 #include "llvm/MC/MCAsmInfo.h" 32 #include "llvm/IR/LegacyPassManager.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/raw_os_ostream.h" 35 #include "llvm/Transforms/IPO.h" 36 #include "llvm/Transforms/Scalar.h" 37 #include <llvm/CodeGen/Passes.h> 38 39 using namespace llvm; 40 41 extern "C" void LLVMInitializeAMDGPUTarget() { 42 // Register the target 43 RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); 44 RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget); 45 46 PassRegistry *PR = PassRegistry::getPassRegistry(); 47 initializeSILowerI1CopiesPass(*PR); 48 initializeSIFixSGPRCopiesPass(*PR); 49 initializeSIFoldOperandsPass(*PR); 50 initializeSIFixSGPRLiveRangesPass(*PR); 51 initializeSIFixControlFlowLiveIntervalsPass(*PR); 52 initializeSILoadStoreOptimizerPass(*PR); 53 initializeAMDGPUAnnotateKernelFeaturesPass(*PR); 54 } 55 56 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 57 if (TT.getOS() == Triple::AMDHSA) 58 return make_unique<AMDGPUHSATargetObjectFile>(); 59 60 return make_unique<TargetLoweringObjectFileELF>(); 61 } 62 63 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 64 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>()); 65 } 66 67 static MachineSchedRegistry 68 SchedCustomRegistry("r600", "Run R600's custom scheduler", 69 createR600MachineScheduler); 70 71 static std::string computeDataLayout(const Triple &TT) { 72 std::string Ret = "e-p:32:32"; 73 74 if (TT.getArch() == Triple::amdgcn) { 75 // 32-bit private, local, and region pointers. 64-bit global and constant. 76 Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; 77 } 78 79 Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" 80 "-v512:512-v1024:1024-v2048:2048-n32:64"; 81 82 return Ret; 83 } 84 85 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, 86 StringRef CPU, StringRef FS, 87 TargetOptions Options, Reloc::Model RM, 88 CodeModel::Model CM, 89 CodeGenOpt::Level OptLevel) 90 : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, 91 OptLevel), 92 TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this), 93 IntrinsicInfo() { 94 setRequiresStructuredCFG(true); 95 initAsmInfo(); 96 } 97 98 AMDGPUTargetMachine::~AMDGPUTargetMachine() { } 99 100 //===----------------------------------------------------------------------===// 101 // R600 Target Machine (R600 -> Cayman) 102 //===----------------------------------------------------------------------===// 103 104 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, 105 StringRef FS, StringRef CPU, 106 TargetOptions Options, Reloc::Model RM, 107 CodeModel::Model CM, CodeGenOpt::Level OL) 108 : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {} 109 110 //===----------------------------------------------------------------------===// 111 // GCN Target Machine (SI+) 112 //===----------------------------------------------------------------------===// 113 114 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, 115 StringRef FS, StringRef CPU, 116 TargetOptions Options, Reloc::Model RM, 117 CodeModel::Model CM, CodeGenOpt::Level OL) 118 : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {} 119 120 //===----------------------------------------------------------------------===// 121 // AMDGPU Pass Setup 122 //===----------------------------------------------------------------------===// 123 124 namespace { 125 class AMDGPUPassConfig : public TargetPassConfig { 126 public: 127 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 128 : TargetPassConfig(TM, PM) { 129 130 // Exceptions and StackMaps are not supported, so these passes will never do 131 // anything. 132 disablePass(&StackMapLivenessID); 133 disablePass(&FuncletLayoutID); 134 } 135 136 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 137 return getTM<AMDGPUTargetMachine>(); 138 } 139 140 ScheduleDAGInstrs * 141 createMachineScheduler(MachineSchedContext *C) const override { 142 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 143 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 144 return createR600MachineScheduler(C); 145 return nullptr; 146 } 147 148 void addIRPasses() override; 149 void addCodeGenPrepare() override; 150 bool addPreISel() override; 151 bool addInstSelector() override; 152 bool addGCPasses() override; 153 }; 154 155 class R600PassConfig : public AMDGPUPassConfig { 156 public: 157 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 158 : AMDGPUPassConfig(TM, PM) { } 159 160 bool addPreISel() override; 161 void addPreRegAlloc() override; 162 void addPreSched2() override; 163 void addPreEmitPass() override; 164 }; 165 166 class GCNPassConfig : public AMDGPUPassConfig { 167 public: 168 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 169 : AMDGPUPassConfig(TM, PM) { } 170 bool addPreISel() override; 171 bool addInstSelector() override; 172 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 173 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 174 void addPreRegAlloc() override; 175 void addPostRegAlloc() override; 176 void addPreSched2() override; 177 void addPreEmitPass() override; 178 }; 179 180 } // End of anonymous namespace 181 182 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 183 return TargetIRAnalysis([this](const Function &F) { 184 return TargetTransformInfo( 185 AMDGPUTTIImpl(this, F.getParent()->getDataLayout())); 186 }); 187 } 188 189 void AMDGPUPassConfig::addIRPasses() { 190 // Function calls are not supported, so make sure we inline everything. 191 addPass(createAMDGPUAlwaysInlinePass()); 192 addPass(createAlwaysInlinerPass()); 193 // We need to add the barrier noop pass, otherwise adding the function 194 // inlining pass will cause all of the PassConfigs passes to be run 195 // one function at a time, which means if we have a nodule with two 196 // functions, then we will generate code for the first function 197 // without ever running any passes on the second. 198 addPass(createBarrierNoopPass()); 199 200 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. 201 addPass(createAMDGPUOpenCLImageTypeLoweringPass()); 202 203 TargetPassConfig::addIRPasses(); 204 } 205 206 void AMDGPUPassConfig::addCodeGenPrepare() { 207 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 208 if (ST.isPromoteAllocaEnabled()) { 209 addPass(createAMDGPUPromoteAlloca(ST)); 210 addPass(createSROAPass()); 211 } 212 TargetPassConfig::addCodeGenPrepare(); 213 } 214 215 bool 216 AMDGPUPassConfig::addPreISel() { 217 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 218 addPass(createFlattenCFGPass()); 219 if (ST.IsIRStructurizerEnabled()) 220 addPass(createStructurizeCFGPass()); 221 return false; 222 } 223 224 bool AMDGPUPassConfig::addInstSelector() { 225 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); 226 return false; 227 } 228 229 bool AMDGPUPassConfig::addGCPasses() { 230 // Do nothing. GC is not supported. 231 return false; 232 } 233 234 //===----------------------------------------------------------------------===// 235 // R600 Pass Setup 236 //===----------------------------------------------------------------------===// 237 238 bool R600PassConfig::addPreISel() { 239 AMDGPUPassConfig::addPreISel(); 240 addPass(createR600TextureIntrinsicsReplacer()); 241 return false; 242 } 243 244 void R600PassConfig::addPreRegAlloc() { 245 addPass(createR600VectorRegMerger(*TM)); 246 } 247 248 void R600PassConfig::addPreSched2() { 249 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 250 addPass(createR600EmitClauseMarkers(), false); 251 if (ST.isIfCvtEnabled()) 252 addPass(&IfConverterID, false); 253 addPass(createR600ClauseMergePass(*TM), false); 254 } 255 256 void R600PassConfig::addPreEmitPass() { 257 addPass(createAMDGPUCFGStructurizerPass(), false); 258 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 259 addPass(&FinalizeMachineBundlesID, false); 260 addPass(createR600Packetizer(*TM), false); 261 addPass(createR600ControlFlowFinalizer(*TM), false); 262 } 263 264 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 265 return new R600PassConfig(this, PM); 266 } 267 268 //===----------------------------------------------------------------------===// 269 // GCN Pass Setup 270 //===----------------------------------------------------------------------===// 271 272 bool GCNPassConfig::addPreISel() { 273 AMDGPUPassConfig::addPreISel(); 274 275 // FIXME: We need to run a pass to propagate the attributes when calls are 276 // supported. 277 addPass(&AMDGPUAnnotateKernelFeaturesID); 278 279 addPass(createSinkingPass()); 280 addPass(createSITypeRewriter()); 281 addPass(createSIAnnotateControlFlowPass()); 282 return false; 283 } 284 285 bool GCNPassConfig::addInstSelector() { 286 AMDGPUPassConfig::addInstSelector(); 287 addPass(createSILowerI1CopiesPass()); 288 addPass(&SIFixSGPRCopiesID); 289 addPass(createSIFoldOperandsPass()); 290 return false; 291 } 292 293 void GCNPassConfig::addPreRegAlloc() { 294 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 295 296 // This needs to be run directly before register allocation because 297 // earlier passes might recompute live intervals. 298 // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass 299 if (getOptLevel() > CodeGenOpt::None) { 300 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); 301 } 302 303 if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { 304 // Don't do this with no optimizations since it throws away debug info by 305 // merging nonadjacent loads. 306 307 // This should be run after scheduling, but before register allocation. It 308 // also need extra copies to the address operand to be eliminated. 309 insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); 310 insertPass(&MachineSchedulerID, &RegisterCoalescerID); 311 } 312 addPass(createSIShrinkInstructionsPass(), false); 313 } 314 315 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 316 addPass(&SIFixSGPRLiveRangesID); 317 TargetPassConfig::addFastRegAlloc(RegAllocPass); 318 } 319 320 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 321 // We want to run this after LiveVariables is computed to avoid computing them 322 // twice. 323 // FIXME: We shouldn't disable the verifier here. r249087 introduced a failure 324 // that needs to be fixed. 325 insertPass(&LiveVariablesID, &SIFixSGPRLiveRangesID, /*VerifyAfter=*/false); 326 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); 327 } 328 329 void GCNPassConfig::addPostRegAlloc() { 330 addPass(createSIPrepareScratchRegs(), false); 331 addPass(createSIShrinkInstructionsPass(), false); 332 } 333 334 void GCNPassConfig::addPreSched2() { 335 } 336 337 void GCNPassConfig::addPreEmitPass() { 338 addPass(createSIInsertWaits(*TM), false); 339 addPass(createSILowerControlFlowPass(*TM), false); 340 } 341 342 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 343 return new GCNPassConfig(this, PM); 344 } 345