1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief The AMDGPU target machine contains all of the hardware specific 12 /// information needed to emit code for R600 and SI GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUTargetMachine.h" 17 #include "AMDGPUHSATargetObjectFile.h" 18 #include "AMDGPU.h" 19 #include "AMDGPUTargetTransformInfo.h" 20 #include "R600ISelLowering.h" 21 #include "R600InstrInfo.h" 22 #include "R600MachineScheduler.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "llvm/Analysis/Passes.h" 26 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/CodeGen/MachineModuleInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/IR/Verifier.h" 31 #include "llvm/MC/MCAsmInfo.h" 32 #include "llvm/IR/LegacyPassManager.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/raw_os_ostream.h" 35 #include "llvm/Transforms/IPO.h" 36 #include "llvm/Transforms/Scalar.h" 37 #include <llvm/CodeGen/Passes.h> 38 39 using namespace llvm; 40 41 extern "C" void LLVMInitializeAMDGPUTarget() { 42 // Register the target 43 RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); 44 RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget); 45 46 PassRegistry *PR = PassRegistry::getPassRegistry(); 47 initializeSILowerI1CopiesPass(*PR); 48 initializeSIFixSGPRCopiesPass(*PR); 49 initializeSIFoldOperandsPass(*PR); 50 initializeSIFixSGPRLiveRangesPass(*PR); 51 initializeSIFixControlFlowLiveIntervalsPass(*PR); 52 initializeSILoadStoreOptimizerPass(*PR); 53 } 54 55 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 56 if (TT.getOS() == Triple::AMDHSA) 57 return make_unique<AMDGPUHSATargetObjectFile>(); 58 59 return make_unique<TargetLoweringObjectFileELF>(); 60 } 61 62 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 63 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>()); 64 } 65 66 static MachineSchedRegistry 67 SchedCustomRegistry("r600", "Run R600's custom scheduler", 68 createR600MachineScheduler); 69 70 static std::string computeDataLayout(const Triple &TT) { 71 std::string Ret = "e-p:32:32"; 72 73 if (TT.getArch() == Triple::amdgcn) { 74 // 32-bit private, local, and region pointers. 64-bit global and constant. 75 Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; 76 } 77 78 Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" 79 "-v512:512-v1024:1024-v2048:2048-n32:64"; 80 81 return Ret; 82 } 83 84 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, 85 StringRef CPU, StringRef FS, 86 TargetOptions Options, Reloc::Model RM, 87 CodeModel::Model CM, 88 CodeGenOpt::Level OptLevel) 89 : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, 90 OptLevel), 91 TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this), 92 IntrinsicInfo() { 93 setRequiresStructuredCFG(true); 94 initAsmInfo(); 95 } 96 97 AMDGPUTargetMachine::~AMDGPUTargetMachine() { } 98 99 //===----------------------------------------------------------------------===// 100 // R600 Target Machine (R600 -> Cayman) 101 //===----------------------------------------------------------------------===// 102 103 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, 104 StringRef FS, StringRef CPU, 105 TargetOptions Options, Reloc::Model RM, 106 CodeModel::Model CM, CodeGenOpt::Level OL) 107 : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {} 108 109 //===----------------------------------------------------------------------===// 110 // GCN Target Machine (SI+) 111 //===----------------------------------------------------------------------===// 112 113 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, 114 StringRef FS, StringRef CPU, 115 TargetOptions Options, Reloc::Model RM, 116 CodeModel::Model CM, CodeGenOpt::Level OL) 117 : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {} 118 119 //===----------------------------------------------------------------------===// 120 // AMDGPU Pass Setup 121 //===----------------------------------------------------------------------===// 122 123 namespace { 124 class AMDGPUPassConfig : public TargetPassConfig { 125 public: 126 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 127 : TargetPassConfig(TM, PM) { 128 129 // Exceptions and StackMaps are not supported, so these passes will never do 130 // anything. 131 disablePass(&StackMapLivenessID); 132 disablePass(&FuncletLayoutID); 133 } 134 135 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 136 return getTM<AMDGPUTargetMachine>(); 137 } 138 139 ScheduleDAGInstrs * 140 createMachineScheduler(MachineSchedContext *C) const override { 141 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 142 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 143 return createR600MachineScheduler(C); 144 return nullptr; 145 } 146 147 void addIRPasses() override; 148 void addCodeGenPrepare() override; 149 bool addPreISel() override; 150 bool addInstSelector() override; 151 bool addGCPasses() override; 152 }; 153 154 class R600PassConfig : public AMDGPUPassConfig { 155 public: 156 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 157 : AMDGPUPassConfig(TM, PM) { } 158 159 bool addPreISel() override; 160 void addPreRegAlloc() override; 161 void addPreSched2() override; 162 void addPreEmitPass() override; 163 }; 164 165 class GCNPassConfig : public AMDGPUPassConfig { 166 public: 167 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 168 : AMDGPUPassConfig(TM, PM) { } 169 bool addPreISel() override; 170 bool addInstSelector() override; 171 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 172 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 173 void addPreRegAlloc() override; 174 void addPostRegAlloc() override; 175 void addPreSched2() override; 176 void addPreEmitPass() override; 177 }; 178 179 } // End of anonymous namespace 180 181 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 182 return TargetIRAnalysis([this](const Function &F) { 183 return TargetTransformInfo( 184 AMDGPUTTIImpl(this, F.getParent()->getDataLayout())); 185 }); 186 } 187 188 void AMDGPUPassConfig::addIRPasses() { 189 // Function calls are not supported, so make sure we inline everything. 190 addPass(createAMDGPUAlwaysInlinePass()); 191 addPass(createAlwaysInlinerPass()); 192 // We need to add the barrier noop pass, otherwise adding the function 193 // inlining pass will cause all of the PassConfigs passes to be run 194 // one function at a time, which means if we have a nodule with two 195 // functions, then we will generate code for the first function 196 // without ever running any passes on the second. 197 addPass(createBarrierNoopPass()); 198 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. 199 addPass(createAMDGPUOpenCLImageTypeLoweringPass()); 200 TargetPassConfig::addIRPasses(); 201 } 202 203 void AMDGPUPassConfig::addCodeGenPrepare() { 204 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 205 if (ST.isPromoteAllocaEnabled()) { 206 addPass(createAMDGPUPromoteAlloca(ST)); 207 addPass(createSROAPass()); 208 } 209 TargetPassConfig::addCodeGenPrepare(); 210 } 211 212 bool 213 AMDGPUPassConfig::addPreISel() { 214 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 215 addPass(createFlattenCFGPass()); 216 if (ST.IsIRStructurizerEnabled()) 217 addPass(createStructurizeCFGPass()); 218 return false; 219 } 220 221 bool AMDGPUPassConfig::addInstSelector() { 222 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); 223 return false; 224 } 225 226 bool AMDGPUPassConfig::addGCPasses() { 227 // Do nothing. GC is not supported. 228 return false; 229 } 230 231 //===----------------------------------------------------------------------===// 232 // R600 Pass Setup 233 //===----------------------------------------------------------------------===// 234 235 bool R600PassConfig::addPreISel() { 236 AMDGPUPassConfig::addPreISel(); 237 addPass(createR600TextureIntrinsicsReplacer()); 238 return false; 239 } 240 241 void R600PassConfig::addPreRegAlloc() { 242 addPass(createR600VectorRegMerger(*TM)); 243 } 244 245 void R600PassConfig::addPreSched2() { 246 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 247 addPass(createR600EmitClauseMarkers(), false); 248 if (ST.isIfCvtEnabled()) 249 addPass(&IfConverterID, false); 250 addPass(createR600ClauseMergePass(*TM), false); 251 } 252 253 void R600PassConfig::addPreEmitPass() { 254 addPass(createAMDGPUCFGStructurizerPass(), false); 255 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 256 addPass(&FinalizeMachineBundlesID, false); 257 addPass(createR600Packetizer(*TM), false); 258 addPass(createR600ControlFlowFinalizer(*TM), false); 259 } 260 261 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 262 return new R600PassConfig(this, PM); 263 } 264 265 //===----------------------------------------------------------------------===// 266 // GCN Pass Setup 267 //===----------------------------------------------------------------------===// 268 269 bool GCNPassConfig::addPreISel() { 270 AMDGPUPassConfig::addPreISel(); 271 addPass(createSinkingPass()); 272 addPass(createSITypeRewriter()); 273 addPass(createSIAnnotateControlFlowPass()); 274 return false; 275 } 276 277 bool GCNPassConfig::addInstSelector() { 278 AMDGPUPassConfig::addInstSelector(); 279 addPass(createSILowerI1CopiesPass()); 280 addPass(&SIFixSGPRCopiesID); 281 addPass(createSIFoldOperandsPass()); 282 return false; 283 } 284 285 void GCNPassConfig::addPreRegAlloc() { 286 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 287 288 // This needs to be run directly before register allocation because 289 // earlier passes might recompute live intervals. 290 // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass 291 if (getOptLevel() > CodeGenOpt::None) { 292 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); 293 } 294 295 if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { 296 // Don't do this with no optimizations since it throws away debug info by 297 // merging nonadjacent loads. 298 299 // This should be run after scheduling, but before register allocation. It 300 // also need extra copies to the address operand to be eliminated. 301 insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); 302 insertPass(&MachineSchedulerID, &RegisterCoalescerID); 303 } 304 addPass(createSIShrinkInstructionsPass(), false); 305 } 306 307 void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 308 addPass(&SIFixSGPRLiveRangesID); 309 TargetPassConfig::addFastRegAlloc(RegAllocPass); 310 } 311 312 void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 313 // We want to run this after LiveVariables is computed to avoid computing them 314 // twice. 315 // FIXME: We shouldn't disable the verifier here. r249087 introduced a failure 316 // that needs to be fixed. 317 insertPass(&LiveVariablesID, &SIFixSGPRLiveRangesID, /*VerifyAfter=*/false); 318 TargetPassConfig::addOptimizedRegAlloc(RegAllocPass); 319 } 320 321 void GCNPassConfig::addPostRegAlloc() { 322 addPass(createSIPrepareScratchRegs(), false); 323 addPass(createSIShrinkInstructionsPass(), false); 324 } 325 326 void GCNPassConfig::addPreSched2() { 327 } 328 329 void GCNPassConfig::addPreEmitPass() { 330 addPass(createSIInsertWaits(*TM), false); 331 addPass(createSILowerControlFlowPass(*TM), false); 332 } 333 334 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 335 return new GCNPassConfig(this, PM); 336 } 337