1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief The AMDGPU target machine contains all of the hardware specific 12 /// information needed to emit code for R600 and SI GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUTargetMachine.h" 17 #include "AMDGPUHSATargetObjectFile.h" 18 #include "AMDGPU.h" 19 #include "AMDGPUTargetTransformInfo.h" 20 #include "R600ISelLowering.h" 21 #include "R600InstrInfo.h" 22 #include "R600MachineScheduler.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "llvm/Analysis/Passes.h" 26 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/CodeGen/MachineModuleInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/IR/Verifier.h" 31 #include "llvm/MC/MCAsmInfo.h" 32 #include "llvm/IR/LegacyPassManager.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/raw_os_ostream.h" 35 #include "llvm/Transforms/IPO.h" 36 #include "llvm/Transforms/Scalar.h" 37 #include <llvm/CodeGen/Passes.h> 38 39 using namespace llvm; 40 41 extern "C" void LLVMInitializeAMDGPUTarget() { 42 // Register the target 43 RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); 44 RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget); 45 46 PassRegistry *PR = PassRegistry::getPassRegistry(); 47 initializeSILowerI1CopiesPass(*PR); 48 initializeSIFoldOperandsPass(*PR); 49 initializeSIFixSGPRLiveRangesPass(*PR); 50 initializeSIFixControlFlowLiveIntervalsPass(*PR); 51 initializeSILoadStoreOptimizerPass(*PR); 52 } 53 54 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 55 if (TT.getOS() == Triple::AMDHSA) 56 return make_unique<AMDGPUHSATargetObjectFile>(); 57 58 return make_unique<TargetLoweringObjectFileELF>(); 59 } 60 61 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 62 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>()); 63 } 64 65 static MachineSchedRegistry 66 SchedCustomRegistry("r600", "Run R600's custom scheduler", 67 createR600MachineScheduler); 68 69 static std::string computeDataLayout(const Triple &TT) { 70 std::string Ret = "e-p:32:32"; 71 72 if (TT.getArch() == Triple::amdgcn) { 73 // 32-bit private, local, and region pointers. 64-bit global and constant. 74 Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; 75 } 76 77 Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" 78 "-v512:512-v1024:1024-v2048:2048-n32:64"; 79 80 return Ret; 81 } 82 83 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, 84 StringRef CPU, StringRef FS, 85 TargetOptions Options, Reloc::Model RM, 86 CodeModel::Model CM, 87 CodeGenOpt::Level OptLevel) 88 : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, 89 OptLevel), 90 TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this), 91 IntrinsicInfo() { 92 setRequiresStructuredCFG(true); 93 initAsmInfo(); 94 } 95 96 AMDGPUTargetMachine::~AMDGPUTargetMachine() { } 97 98 //===----------------------------------------------------------------------===// 99 // R600 Target Machine (R600 -> Cayman) 100 //===----------------------------------------------------------------------===// 101 102 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, 103 StringRef FS, StringRef CPU, 104 TargetOptions Options, Reloc::Model RM, 105 CodeModel::Model CM, CodeGenOpt::Level OL) 106 : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {} 107 108 //===----------------------------------------------------------------------===// 109 // GCN Target Machine (SI+) 110 //===----------------------------------------------------------------------===// 111 112 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, 113 StringRef FS, StringRef CPU, 114 TargetOptions Options, Reloc::Model RM, 115 CodeModel::Model CM, CodeGenOpt::Level OL) 116 : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {} 117 118 //===----------------------------------------------------------------------===// 119 // AMDGPU Pass Setup 120 //===----------------------------------------------------------------------===// 121 122 namespace { 123 class AMDGPUPassConfig : public TargetPassConfig { 124 public: 125 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 126 : TargetPassConfig(TM, PM) { 127 128 // Exceptions and StackMaps are not supported, so these passes will never do 129 // anything. 130 disablePass(&StackMapLivenessID); 131 disablePass(&FuncletLayoutID); 132 } 133 134 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 135 return getTM<AMDGPUTargetMachine>(); 136 } 137 138 ScheduleDAGInstrs * 139 createMachineScheduler(MachineSchedContext *C) const override { 140 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 141 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 142 return createR600MachineScheduler(C); 143 return nullptr; 144 } 145 146 void addIRPasses() override; 147 void addCodeGenPrepare() override; 148 bool addPreISel() override; 149 bool addInstSelector() override; 150 bool addGCPasses() override; 151 }; 152 153 class R600PassConfig : public AMDGPUPassConfig { 154 public: 155 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 156 : AMDGPUPassConfig(TM, PM) { } 157 158 bool addPreISel() override; 159 void addPreRegAlloc() override; 160 void addPreSched2() override; 161 void addPreEmitPass() override; 162 }; 163 164 class GCNPassConfig : public AMDGPUPassConfig { 165 public: 166 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 167 : AMDGPUPassConfig(TM, PM) { } 168 bool addPreISel() override; 169 bool addInstSelector() override; 170 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 171 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 172 void addPreRegAlloc() override; 173 void addPostRegAlloc() override; 174 void addPreSched2() override; 175 void addPreEmitPass() override; 176 }; 177 178 } // End of anonymous namespace 179 180 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 181 return TargetIRAnalysis([this](const Function &F) { 182 return TargetTransformInfo( 183 AMDGPUTTIImpl(this, F.getParent()->getDataLayout())); 184 }); 185 } 186 187 void AMDGPUPassConfig::addIRPasses() { 188 // Function calls are not supported, so make sure we inline everything. 189 addPass(createAMDGPUAlwaysInlinePass()); 190 addPass(createAlwaysInlinerPass()); 191 // We need to add the barrier noop pass, otherwise adding the function 192 // inlining pass will cause all of the PassConfigs passes to be run 193 // one function at a time, which means if we have a nodule with two 194 // functions, then we will generate code for the first function 195 // without ever running any passes on the second. 196 addPass(createBarrierNoopPass()); 197 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. 198 addPass(createAMDGPUOpenCLImageTypeLoweringPass()); 199 TargetPassConfig::addIRPasses(); 200 } 201 202 void AMDGPUPassConfig::addCodeGenPrepare() { 203 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 204 if (ST.isPromoteAllocaEnabled()) { 205 addPass(createAMDGPUPromoteAlloca(ST)); 206 addPass(createSROAPass()); 207 } 208 TargetPassConfig::addCodeGenPrepare(); 209 } 210 211 bool 212 AMDGPUPassConfig::addPreISel() { 213 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 214 addPass(createFlattenCFGPass()); 215 if (ST.IsIRStructurizerEnabled()) 216 addPass(createStructurizeCFGPass()); 217 return false; 218 } 219 220 bool AMDGPUPassConfig::addInstSelector() { 221 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); 222 return false; 223 } 224 225 bool AMDGPUPassConfig::addGCPasses() { 226 // Do nothing. GC is not supported. 227 return false; 228 } 229 230 //===----------------------------------------------------------------------===// 231 // R600 Pass Setup 232 //===----------------------------------------------------------------------===// 233 234 bool R600PassConfig::addPreISel() { 235 AMDGPUPassConfig::addPreISel(); 236 addPass(createR600TextureIntrinsicsReplacer()); 237 return false; 238 } 239 240 void R600PassConfig::addPreRegAlloc() { 241 addPass(createR600VectorRegMerger(*TM)); 242 } 243 244 void R600PassConfig::addPreSched2() { 245 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 246 addPass(createR600EmitClauseMarkers(), false); 247 if (ST.isIfCvtEnabled()) 248 addPass(&IfConverterID, false); 249 addPass(createR600ClauseMergePass(*TM), false); 250 } 251 252 void R600PassConfig::addPreEmitPass() { 253 addPass(createAMDGPUCFGStructurizerPass(), false); 254 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 255 addPass(&FinalizeMachineBundlesID, false); 256 addPass(createR600Packetizer(*TM), false); 257 addPass(createR600ControlFlowFinalizer(*TM), false); 258 } 259 260 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 261 return new R600PassConfig(this, PM); 262 } 263 264 //===----------------------------------------------------------------------===// 265 // GCN Pass Setup 266 //===----------------------------------------------------------------------===// 267 268 bool GCNPassConfig::addPreISel() { 269 AMDGPUPassConfig::addPreISel(); 270 addPass(createSinkingPass()); 271 addPass(createSITypeRewriter()); 272 addPass(createSIAnnotateControlFlowPass()); 273 return false; 274 } 275 276 bool GCNPassConfig::addInstSelector() { 277 AMDGPUPassConfig::addInstSelector(); 278 addPass(createSILowerI1CopiesPass()); 279 addPass(createSIFixSGPRCopiesPass(*TM)); 280 addPass(createSIFoldOperandsPass()); 281 return false; 282 } 283 284 void GCNPassConfig::addPreRegAlloc() { 285 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 286 287 // This needs to be run directly before register allocation because 288 // earlier passes might recompute live intervals. 289 // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass 290 if (getOptLevel() > CodeGenOpt::None) { 291 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); 292 } 293 294 if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { 295 // Don't do this with no optimizations since it throws away debug info by 296 // merging nonadjacent loads. 297 298 // This should be run after scheduling, but before register allocation. It 299 // also need extra copies to the address operand to be eliminated. 300 insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); 301 insertPass(&MachineSchedulerID, &RegisterCoalescerID); 302 } 303 addPass(createSIShrinkInstructionsPass(), false); 304 } 305 306 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 307 addPass(&SIFixSGPRLiveRangesID); 308 TargetPassConfig::addFastRegAlloc(RegAllocPass); 309 } 310 311 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 312 // We want to run this after LiveVariables is computed to avoid computing them 313 // twice. 314 // FIXME: We shouldn't disable the verifier here. r249087 introduced a failure 315 // that needs to be fixed. 316 insertPass(&LiveVariablesID, &SIFixSGPRLiveRangesID, /*VerifyAfter=*/false); 317 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); 318 } 319 320 void GCNPassConfig::addPostRegAlloc() { 321 addPass(createSIPrepareScratchRegs(), false); 322 addPass(createSIShrinkInstructionsPass(), false); 323 } 324 325 void GCNPassConfig::addPreSched2() { 326 } 327 328 void GCNPassConfig::addPreEmitPass() { 329 addPass(createSIInsertWaits(*TM), false); 330 addPass(createSILowerControlFlowPass(*TM), false); 331 } 332 333 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 334 return new GCNPassConfig(this, PM); 335 } 336