1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief The AMDGPU target machine contains all of the hardware specific 12 /// information needed to emit code for R600 and SI GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUTargetMachine.h" 17 #include "AMDGPUHSATargetObjectFile.h" 18 #include "AMDGPU.h" 19 #include "AMDGPUTargetTransformInfo.h" 20 #include "R600ISelLowering.h" 21 #include "R600InstrInfo.h" 22 #include "R600MachineScheduler.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "llvm/Analysis/Passes.h" 26 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/CodeGen/MachineModuleInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/IR/Verifier.h" 31 #include "llvm/MC/MCAsmInfo.h" 32 #include "llvm/IR/LegacyPassManager.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/raw_os_ostream.h" 35 #include "llvm/Transforms/IPO.h" 36 #include "llvm/Transforms/Scalar.h" 37 #include <llvm/CodeGen/Passes.h> 38 39 using namespace llvm; 40 41 extern "C" void LLVMInitializeAMDGPUTarget() { 42 // Register the target 43 RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); 44 RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget); 45 46 PassRegistry *PR = PassRegistry::getPassRegistry(); 47 initializeSIFixSGPRLiveRangesPass(*PR); 48 } 49 50 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 51 if (TT.getOS() == Triple::AMDHSA) 52 return make_unique<AMDGPUHSATargetObjectFile>(); 53 54 return make_unique<TargetLoweringObjectFileELF>(); 55 } 56 57 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 58 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>()); 59 } 60 61 static MachineSchedRegistry 62 SchedCustomRegistry("r600", "Run R600's custom scheduler", 63 createR600MachineScheduler); 64 65 static std::string computeDataLayout(const Triple &TT) { 66 std::string Ret = "e-p:32:32"; 67 68 if (TT.getArch() == Triple::amdgcn) { 69 // 32-bit private, local, and region pointers. 64-bit global and constant. 70 Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; 71 } 72 73 Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" 74 "-v512:512-v1024:1024-v2048:2048-n32:64"; 75 76 return Ret; 77 } 78 79 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, 80 StringRef CPU, StringRef FS, 81 TargetOptions Options, Reloc::Model RM, 82 CodeModel::Model CM, 83 CodeGenOpt::Level OptLevel) 84 : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, 85 OptLevel), 86 TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this), 87 IntrinsicInfo() { 88 setRequiresStructuredCFG(true); 89 initAsmInfo(); 90 } 91 92 AMDGPUTargetMachine::~AMDGPUTargetMachine() { } 93 94 //===----------------------------------------------------------------------===// 95 // R600 Target Machine (R600 -> Cayman) 96 //===----------------------------------------------------------------------===// 97 98 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, 99 StringRef FS, StringRef CPU, 100 TargetOptions Options, Reloc::Model RM, 101 CodeModel::Model CM, CodeGenOpt::Level OL) 102 : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {} 103 104 //===----------------------------------------------------------------------===// 105 // GCN Target Machine (SI+) 106 //===----------------------------------------------------------------------===// 107 108 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, 109 StringRef FS, StringRef CPU, 110 TargetOptions Options, Reloc::Model RM, 111 CodeModel::Model CM, CodeGenOpt::Level OL) 112 : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {} 113 114 //===----------------------------------------------------------------------===// 115 // AMDGPU Pass Setup 116 //===----------------------------------------------------------------------===// 117 118 namespace { 119 class AMDGPUPassConfig : public TargetPassConfig { 120 public: 121 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 122 : TargetPassConfig(TM, PM) { 123 124 // Exceptions and StackMaps are not supported, so these passes will never do 125 // anything. 126 disablePass(&StackMapLivenessID); 127 disablePass(&FuncletLayoutID); 128 } 129 130 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 131 return getTM<AMDGPUTargetMachine>(); 132 } 133 134 ScheduleDAGInstrs * 135 createMachineScheduler(MachineSchedContext *C) const override { 136 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 137 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 138 return createR600MachineScheduler(C); 139 return nullptr; 140 } 141 142 void addIRPasses() override; 143 void addCodeGenPrepare() override; 144 bool addPreISel() override; 145 bool addInstSelector() override; 146 bool addGCPasses() override; 147 }; 148 149 class R600PassConfig : public AMDGPUPassConfig { 150 public: 151 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 152 : AMDGPUPassConfig(TM, PM) { } 153 154 bool addPreISel() override; 155 void addPreRegAlloc() override; 156 void addPreSched2() override; 157 void addPreEmitPass() override; 158 }; 159 160 class GCNPassConfig : public AMDGPUPassConfig { 161 public: 162 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 163 : AMDGPUPassConfig(TM, PM) { } 164 bool addPreISel() override; 165 bool addInstSelector() override; 166 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 167 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 168 void addPreRegAlloc() override; 169 void addPostRegAlloc() override; 170 void addPreSched2() override; 171 void addPreEmitPass() override; 172 }; 173 174 } // End of anonymous namespace 175 176 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 177 return TargetIRAnalysis([this](const Function &F) { 178 return TargetTransformInfo( 179 AMDGPUTTIImpl(this, F.getParent()->getDataLayout())); 180 }); 181 } 182 183 void AMDGPUPassConfig::addIRPasses() { 184 // Function calls are not supported, so make sure we inline everything. 185 addPass(createAMDGPUAlwaysInlinePass()); 186 addPass(createAlwaysInlinerPass()); 187 // We need to add the barrier noop pass, otherwise adding the function 188 // inlining pass will cause all of the PassConfigs passes to be run 189 // one function at a time, which means if we have a nodule with two 190 // functions, then we will generate code for the first function 191 // without ever running any passes on the second. 192 addPass(createBarrierNoopPass()); 193 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. 194 addPass(createAMDGPUOpenCLImageTypeLoweringPass()); 195 TargetPassConfig::addIRPasses(); 196 } 197 198 void AMDGPUPassConfig::addCodeGenPrepare() { 199 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 200 if (ST.isPromoteAllocaEnabled()) { 201 addPass(createAMDGPUPromoteAlloca(ST)); 202 addPass(createSROAPass()); 203 } 204 TargetPassConfig::addCodeGenPrepare(); 205 } 206 207 bool 208 AMDGPUPassConfig::addPreISel() { 209 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 210 addPass(createFlattenCFGPass()); 211 if (ST.IsIRStructurizerEnabled()) 212 addPass(createStructurizeCFGPass()); 213 return false; 214 } 215 216 bool AMDGPUPassConfig::addInstSelector() { 217 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); 218 return false; 219 } 220 221 bool AMDGPUPassConfig::addGCPasses() { 222 // Do nothing. GC is not supported. 223 return false; 224 } 225 226 //===----------------------------------------------------------------------===// 227 // R600 Pass Setup 228 //===----------------------------------------------------------------------===// 229 230 bool R600PassConfig::addPreISel() { 231 AMDGPUPassConfig::addPreISel(); 232 addPass(createR600TextureIntrinsicsReplacer()); 233 return false; 234 } 235 236 void R600PassConfig::addPreRegAlloc() { 237 addPass(createR600VectorRegMerger(*TM)); 238 } 239 240 void R600PassConfig::addPreSched2() { 241 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 242 addPass(createR600EmitClauseMarkers(), false); 243 if (ST.isIfCvtEnabled()) 244 addPass(&IfConverterID, false); 245 addPass(createR600ClauseMergePass(*TM), false); 246 } 247 248 void R600PassConfig::addPreEmitPass() { 249 addPass(createAMDGPUCFGStructurizerPass(), false); 250 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 251 addPass(&FinalizeMachineBundlesID, false); 252 addPass(createR600Packetizer(*TM), false); 253 addPass(createR600ControlFlowFinalizer(*TM), false); 254 } 255 256 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 257 return new R600PassConfig(this, PM); 258 } 259 260 //===----------------------------------------------------------------------===// 261 // GCN Pass Setup 262 //===----------------------------------------------------------------------===// 263 264 bool GCNPassConfig::addPreISel() { 265 AMDGPUPassConfig::addPreISel(); 266 addPass(createSinkingPass()); 267 addPass(createSITypeRewriter()); 268 addPass(createSIAnnotateControlFlowPass()); 269 return false; 270 } 271 272 bool GCNPassConfig::addInstSelector() { 273 AMDGPUPassConfig::addInstSelector(); 274 addPass(createSILowerI1CopiesPass()); 275 addPass(createSIFixSGPRCopiesPass(*TM)); 276 addPass(createSIFoldOperandsPass()); 277 return false; 278 } 279 280 void GCNPassConfig::addPreRegAlloc() { 281 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 282 283 // This needs to be run directly before register allocation because 284 // earlier passes might recompute live intervals. 285 // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass 286 if (getOptLevel() > CodeGenOpt::None) { 287 initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry()); 288 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); 289 } 290 291 if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { 292 // Don't do this with no optimizations since it throws away debug info by 293 // merging nonadjacent loads. 294 295 // This should be run after scheduling, but before register allocation. It 296 // also need extra copies to the address operand to be eliminated. 297 initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); 298 insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); 299 insertPass(&MachineSchedulerID, &RegisterCoalescerID); 300 } 301 addPass(createSIShrinkInstructionsPass(), false); 302 } 303 304 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 305 addPass(&SIFixSGPRLiveRangesID); 306 TargetPassConfig::addFastRegAlloc(RegAllocPass); 307 } 308 309 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 310 // We want to run this after LiveVariables is computed to avoid computing them 311 // twice. 312 insertPass(&LiveVariablesID, &SIFixSGPRLiveRangesID); 313 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); 314 } 315 316 void GCNPassConfig::addPostRegAlloc() { 317 addPass(createSIPrepareScratchRegs(), false); 318 addPass(createSIShrinkInstructionsPass(), false); 319 } 320 321 void GCNPassConfig::addPreSched2() { 322 } 323 324 void GCNPassConfig::addPreEmitPass() { 325 addPass(createSIInsertWaits(*TM), false); 326 addPass(createSILowerControlFlowPass(*TM), false); 327 } 328 329 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 330 return new GCNPassConfig(this, PM); 331 } 332