1 //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief The AMDGPU target machine contains all of the hardware specific 12 /// information needed to emit code for R600 and SI GPUs. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPUTargetMachine.h" 17 #include "AMDGPUHSATargetObjectFile.h" 18 #include "AMDGPU.h" 19 #include "AMDGPUTargetTransformInfo.h" 20 #include "R600ISelLowering.h" 21 #include "R600InstrInfo.h" 22 #include "R600MachineScheduler.h" 23 #include "SIISelLowering.h" 24 #include "SIInstrInfo.h" 25 #include "llvm/Analysis/Passes.h" 26 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 27 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" 28 #include "llvm/CodeGen/MachineModuleInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/IR/Verifier.h" 31 #include "llvm/MC/MCAsmInfo.h" 32 #include "llvm/IR/LegacyPassManager.h" 33 #include "llvm/Support/TargetRegistry.h" 34 #include "llvm/Support/raw_os_ostream.h" 35 #include "llvm/Transforms/IPO.h" 36 #include "llvm/Transforms/Scalar.h" 37 #include <llvm/CodeGen/Passes.h> 38 39 using namespace llvm; 40 41 extern "C" void LLVMInitializeAMDGPUTarget() { 42 // Register the target 43 RegisterTargetMachine<R600TargetMachine> X(TheAMDGPUTarget); 44 RegisterTargetMachine<GCNTargetMachine> Y(TheGCNTarget); 45 } 46 47 static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) { 48 if (TT.getOS() == Triple::AMDHSA) 49 return make_unique<AMDGPUHSATargetObjectFile>(); 50 51 return make_unique<TargetLoweringObjectFileELF>(); 52 } 53 54 static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { 55 return new ScheduleDAGMILive(C, make_unique<R600SchedStrategy>()); 56 } 57 58 static MachineSchedRegistry 59 SchedCustomRegistry("r600", "Run R600's custom scheduler", 60 createR600MachineScheduler); 61 62 static std::string computeDataLayout(const Triple &TT) { 63 std::string Ret = "e-p:32:32"; 64 65 if (TT.getArch() == Triple::amdgcn) { 66 // 32-bit private, local, and region pointers. 64-bit global and constant. 67 Ret += "-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64"; 68 } 69 70 Ret += "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256" 71 "-v512:512-v1024:1024-v2048:2048-n32:64"; 72 73 return Ret; 74 } 75 76 AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT, 77 StringRef CPU, StringRef FS, 78 TargetOptions Options, Reloc::Model RM, 79 CodeModel::Model CM, 80 CodeGenOpt::Level OptLevel) 81 : LLVMTargetMachine(T, computeDataLayout(TT), TT, CPU, FS, Options, RM, CM, 82 OptLevel), 83 TLOF(createTLOF(getTargetTriple())), Subtarget(TT, CPU, FS, *this), 84 IntrinsicInfo() { 85 setRequiresStructuredCFG(true); 86 initAsmInfo(); 87 } 88 89 AMDGPUTargetMachine::~AMDGPUTargetMachine() { } 90 91 //===----------------------------------------------------------------------===// 92 // R600 Target Machine (R600 -> Cayman) 93 //===----------------------------------------------------------------------===// 94 95 R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT, 96 StringRef FS, StringRef CPU, 97 TargetOptions Options, Reloc::Model RM, 98 CodeModel::Model CM, CodeGenOpt::Level OL) 99 : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {} 100 101 //===----------------------------------------------------------------------===// 102 // GCN Target Machine (SI+) 103 //===----------------------------------------------------------------------===// 104 105 GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT, 106 StringRef FS, StringRef CPU, 107 TargetOptions Options, Reloc::Model RM, 108 CodeModel::Model CM, CodeGenOpt::Level OL) 109 : AMDGPUTargetMachine(T, TT, FS, CPU, Options, RM, CM, OL) {} 110 111 //===----------------------------------------------------------------------===// 112 // AMDGPU Pass Setup 113 //===----------------------------------------------------------------------===// 114 115 namespace { 116 class AMDGPUPassConfig : public TargetPassConfig { 117 public: 118 AMDGPUPassConfig(TargetMachine *TM, PassManagerBase &PM) 119 : TargetPassConfig(TM, PM) { 120 121 // Exceptions and StackMaps are not supported, so these passes will never do 122 // anything. 123 disablePass(&StackMapLivenessID); 124 disablePass(&FuncletLayoutID); 125 } 126 127 AMDGPUTargetMachine &getAMDGPUTargetMachine() const { 128 return getTM<AMDGPUTargetMachine>(); 129 } 130 131 ScheduleDAGInstrs * 132 createMachineScheduler(MachineSchedContext *C) const override { 133 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 134 if (ST.getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) 135 return createR600MachineScheduler(C); 136 return nullptr; 137 } 138 139 void addIRPasses() override; 140 void addCodeGenPrepare() override; 141 bool addPreISel() override; 142 bool addInstSelector() override; 143 bool addGCPasses() override; 144 }; 145 146 class R600PassConfig : public AMDGPUPassConfig { 147 public: 148 R600PassConfig(TargetMachine *TM, PassManagerBase &PM) 149 : AMDGPUPassConfig(TM, PM) { } 150 151 bool addPreISel() override; 152 void addPreRegAlloc() override; 153 void addPreSched2() override; 154 void addPreEmitPass() override; 155 }; 156 157 class GCNPassConfig : public AMDGPUPassConfig { 158 public: 159 GCNPassConfig(TargetMachine *TM, PassManagerBase &PM) 160 : AMDGPUPassConfig(TM, PM) { } 161 bool addPreISel() override; 162 bool addInstSelector() override; 163 void addPreRegAlloc() override; 164 void addPostRegAlloc() override; 165 void addPreSched2() override; 166 void addPreEmitPass() override; 167 }; 168 169 } // End of anonymous namespace 170 171 TargetIRAnalysis AMDGPUTargetMachine::getTargetIRAnalysis() { 172 return TargetIRAnalysis([this](const Function &F) { 173 return TargetTransformInfo( 174 AMDGPUTTIImpl(this, F.getParent()->getDataLayout())); 175 }); 176 } 177 178 void AMDGPUPassConfig::addIRPasses() { 179 // Function calls are not supported, so make sure we inline everything. 180 addPass(createAMDGPUAlwaysInlinePass()); 181 addPass(createAlwaysInlinerPass()); 182 // We need to add the barrier noop pass, otherwise adding the function 183 // inlining pass will cause all of the PassConfigs passes to be run 184 // one function at a time, which means if we have a nodule with two 185 // functions, then we will generate code for the first function 186 // without ever running any passes on the second. 187 addPass(createBarrierNoopPass()); 188 // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. 189 addPass(createAMDGPUOpenCLImageTypeLoweringPass()); 190 TargetPassConfig::addIRPasses(); 191 } 192 193 void AMDGPUPassConfig::addCodeGenPrepare() { 194 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 195 if (ST.isPromoteAllocaEnabled()) { 196 addPass(createAMDGPUPromoteAlloca(ST)); 197 addPass(createSROAPass()); 198 } 199 TargetPassConfig::addCodeGenPrepare(); 200 } 201 202 bool 203 AMDGPUPassConfig::addPreISel() { 204 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 205 addPass(createFlattenCFGPass()); 206 if (ST.IsIRStructurizerEnabled()) 207 addPass(createStructurizeCFGPass()); 208 return false; 209 } 210 211 bool AMDGPUPassConfig::addInstSelector() { 212 addPass(createAMDGPUISelDag(getAMDGPUTargetMachine())); 213 return false; 214 } 215 216 bool AMDGPUPassConfig::addGCPasses() { 217 // Do nothing. GC is not supported. 218 return false; 219 } 220 221 //===----------------------------------------------------------------------===// 222 // R600 Pass Setup 223 //===----------------------------------------------------------------------===// 224 225 bool R600PassConfig::addPreISel() { 226 AMDGPUPassConfig::addPreISel(); 227 addPass(createR600TextureIntrinsicsReplacer()); 228 return false; 229 } 230 231 void R600PassConfig::addPreRegAlloc() { 232 addPass(createR600VectorRegMerger(*TM)); 233 } 234 235 void R600PassConfig::addPreSched2() { 236 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 237 addPass(createR600EmitClauseMarkers(), false); 238 if (ST.isIfCvtEnabled()) 239 addPass(&IfConverterID, false); 240 addPass(createR600ClauseMergePass(*TM), false); 241 } 242 243 void R600PassConfig::addPreEmitPass() { 244 addPass(createAMDGPUCFGStructurizerPass(), false); 245 addPass(createR600ExpandSpecialInstrsPass(*TM), false); 246 addPass(&FinalizeMachineBundlesID, false); 247 addPass(createR600Packetizer(*TM), false); 248 addPass(createR600ControlFlowFinalizer(*TM), false); 249 } 250 251 TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) { 252 return new R600PassConfig(this, PM); 253 } 254 255 //===----------------------------------------------------------------------===// 256 // GCN Pass Setup 257 //===----------------------------------------------------------------------===// 258 259 bool GCNPassConfig::addPreISel() { 260 AMDGPUPassConfig::addPreISel(); 261 addPass(createSinkingPass()); 262 addPass(createSITypeRewriter()); 263 addPass(createSIAnnotateControlFlowPass()); 264 return false; 265 } 266 267 bool GCNPassConfig::addInstSelector() { 268 AMDGPUPassConfig::addInstSelector(); 269 addPass(createSILowerI1CopiesPass()); 270 addPass(createSIFixSGPRCopiesPass(*TM)); 271 addPass(createSIFoldOperandsPass()); 272 return false; 273 } 274 275 void GCNPassConfig::addPreRegAlloc() { 276 const AMDGPUSubtarget &ST = *getAMDGPUTargetMachine().getSubtargetImpl(); 277 278 // This needs to be run directly before register allocation because 279 // earlier passes might recompute live intervals. 280 // TODO: handle CodeGenOpt::None; fast RA ignores spill weights set by the pass 281 if (getOptLevel() > CodeGenOpt::None) { 282 initializeSIFixControlFlowLiveIntervalsPass(*PassRegistry::getPassRegistry()); 283 insertPass(&MachineSchedulerID, &SIFixControlFlowLiveIntervalsID); 284 } 285 286 if (getOptLevel() > CodeGenOpt::None && ST.loadStoreOptEnabled()) { 287 // Don't do this with no optimizations since it throws away debug info by 288 // merging nonadjacent loads. 289 290 // This should be run after scheduling, but before register allocation. It 291 // also need extra copies to the address operand to be eliminated. 292 initializeSILoadStoreOptimizerPass(*PassRegistry::getPassRegistry()); 293 insertPass(&MachineSchedulerID, &SILoadStoreOptimizerID); 294 insertPass(&MachineSchedulerID, &RegisterCoalescerID); 295 } 296 addPass(createSIShrinkInstructionsPass(), false); 297 addPass(createSIFixSGPRLiveRangesPass()); 298 } 299 300 void GCNPassConfig::addPostRegAlloc() { 301 addPass(createSIPrepareScratchRegs(), false); 302 addPass(createSIShrinkInstructionsPass(), false); 303 } 304 305 void GCNPassConfig::addPreSched2() { 306 } 307 308 void GCNPassConfig::addPreEmitPass() { 309 addPass(createSIInsertWaits(*TM), false); 310 addPass(createSILowerControlFlowPass(*TM), false); 311 } 312 313 TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) { 314 return new GCNPassConfig(this, PM); 315 } 316