1 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Top-level implementation for the NVPTX target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXTargetMachine.h" 15 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 16 #include "NVPTX.h" 17 #include "NVPTXAllocaHoisting.h" 18 #include "NVPTXLowerAggrCopies.h" 19 #include "NVPTXTargetObjectFile.h" 20 #include "NVPTXTargetTransformInfo.h" 21 #include "llvm/Analysis/Passes.h" 22 #include "llvm/CodeGen/AsmPrinter.h" 23 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/Passes.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/IRPrintingPasses.h" 28 #include "llvm/IR/LegacyPassManager.h" 29 #include "llvm/IR/Verifier.h" 30 #include "llvm/MC/MCAsmInfo.h" 31 #include "llvm/MC/MCInstrInfo.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/FormattedStream.h" 37 #include "llvm/Support/TargetRegistry.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Target/TargetInstrInfo.h" 40 #include "llvm/Target/TargetLowering.h" 41 #include "llvm/Target/TargetLoweringObjectFile.h" 42 #include "llvm/Target/TargetMachine.h" 43 #include "llvm/Target/TargetOptions.h" 44 #include "llvm/Target/TargetRegisterInfo.h" 45 #include "llvm/Target/TargetSubtargetInfo.h" 46 #include "llvm/Transforms/Scalar.h" 47 48 using namespace llvm; 49 50 namespace llvm { 51 void initializeNVVMReflectPass(PassRegistry&); 52 void initializeGenericToNVVMPass(PassRegistry&); 53 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 54 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 55 void initializeNVPTXLowerStructArgsPass(PassRegistry &); 56 } 57 58 extern "C" void LLVMInitializeNVPTXTarget() { 59 // Register the target. 60 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 61 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 62 63 // FIXME: This pass is really intended to be invoked during IR optimization, 64 // but it's very NVPTX-specific. 65 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); 66 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); 67 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); 68 initializeNVPTXFavorNonGenericAddrSpacesPass( 69 *PassRegistry::getPassRegistry()); 70 initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); 71 } 72 73 static std::string computeDataLayout(bool is64Bit) { 74 std::string Ret = "e"; 75 76 if (!is64Bit) 77 Ret += "-p:32:32"; 78 79 Ret += "-i64:64-v16:16-v32:32-n16:32:64"; 80 81 return Ret; 82 } 83 84 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, 85 StringRef CPU, StringRef FS, 86 const TargetOptions &Options, 87 Reloc::Model RM, CodeModel::Model CM, 88 CodeGenOpt::Level OL, bool is64bit) 89 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), is64bit(is64bit), 90 TLOF(make_unique<NVPTXTargetObjectFile>()), 91 DL(computeDataLayout(is64bit)), Subtarget(TT, CPU, FS, *this) { 92 if (Triple(TT).getOS() == Triple::NVCL) 93 drvInterface = NVPTX::NVCL; 94 else 95 drvInterface = NVPTX::CUDA; 96 initAsmInfo(); 97 } 98 99 NVPTXTargetMachine::~NVPTXTargetMachine() {} 100 101 void NVPTXTargetMachine32::anchor() {} 102 103 NVPTXTargetMachine32::NVPTXTargetMachine32( 104 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 105 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 106 CodeGenOpt::Level OL) 107 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 108 109 void NVPTXTargetMachine64::anchor() {} 110 111 NVPTXTargetMachine64::NVPTXTargetMachine64( 112 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 113 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 114 CodeGenOpt::Level OL) 115 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 116 117 namespace { 118 class NVPTXPassConfig : public TargetPassConfig { 119 public: 120 NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) 121 : TargetPassConfig(TM, PM) {} 122 123 NVPTXTargetMachine &getNVPTXTargetMachine() const { 124 return getTM<NVPTXTargetMachine>(); 125 } 126 127 void addIRPasses() override; 128 bool addInstSelector() override; 129 void addPostRegAlloc() override; 130 void addMachineSSAOptimization() override; 131 132 FunctionPass *createTargetRegisterAllocator(bool) override; 133 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 134 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 135 }; 136 } // end anonymous namespace 137 138 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 139 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 140 return PassConfig; 141 } 142 143 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { 144 return TargetIRAnalysis( 145 [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); }); 146 } 147 148 void NVPTXPassConfig::addIRPasses() { 149 // The following passes are known to not play well with virtual regs hanging 150 // around after register allocation (which in our case, is *all* registers). 151 // We explicitly disable them here. We do, however, need some functionality 152 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 153 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 154 disablePass(&PrologEpilogCodeInserterID); 155 disablePass(&MachineCopyPropagationID); 156 disablePass(&BranchFolderPassID); 157 disablePass(&TailDuplicateID); 158 159 addPass(createNVPTXImageOptimizerPass()); 160 TargetPassConfig::addIRPasses(); 161 addPass(createNVPTXAssignValidGlobalNamesPass()); 162 addPass(createGenericToNVVMPass()); 163 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 164 addPass(createStraightLineStrengthReducePass()); 165 addPass(createSeparateConstOffsetFromGEPPass()); 166 // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used 167 // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates 168 // significantly better code than EarlyCSE for some of our benchmarks. 169 if (getOptLevel() == CodeGenOpt::Aggressive) 170 addPass(createGVNPass()); 171 else 172 addPass(createEarlyCSEPass()); 173 // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave 174 // some dead code. We could remove dead code in an ad-hoc manner, but that 175 // requires manual work and might be error-prone. 176 // 177 // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, 178 // and leave them unused. 179 // 180 // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the 181 // old index and some of its intermediate results may become unused. 182 addPass(createDeadCodeEliminationPass()); 183 } 184 185 bool NVPTXPassConfig::addInstSelector() { 186 const NVPTXSubtarget &ST = 187 getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); 188 189 addPass(createLowerAggrCopies()); 190 addPass(createAllocaHoisting()); 191 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 192 193 if (!ST.hasImageHandles()) 194 addPass(createNVPTXReplaceImageHandlesPass()); 195 196 return false; 197 } 198 199 void NVPTXPassConfig::addPostRegAlloc() { 200 addPass(createNVPTXPrologEpilogPass(), false); 201 } 202 203 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 204 return nullptr; // No reg alloc 205 } 206 207 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 208 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 209 addPass(&PHIEliminationID); 210 addPass(&TwoAddressInstructionPassID); 211 } 212 213 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 214 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 215 216 addPass(&ProcessImplicitDefsID); 217 addPass(&LiveVariablesID); 218 addPass(&MachineLoopInfoID); 219 addPass(&PHIEliminationID); 220 221 addPass(&TwoAddressInstructionPassID); 222 addPass(&RegisterCoalescerID); 223 224 // PreRA instruction scheduling. 225 if (addPass(&MachineSchedulerID)) 226 printAndVerify("After Machine Scheduling"); 227 228 229 addPass(&StackSlotColoringID); 230 231 // FIXME: Needs physical registers 232 //addPass(&PostRAMachineLICMID); 233 234 printAndVerify("After StackSlotColoring"); 235 } 236 237 void NVPTXPassConfig::addMachineSSAOptimization() { 238 // Pre-ra tail duplication. 239 if (addPass(&EarlyTailDuplicateID)) 240 printAndVerify("After Pre-RegAlloc TailDuplicate"); 241 242 // Optimize PHIs before DCE: removing dead PHI cycles may make more 243 // instructions dead. 244 addPass(&OptimizePHIsID); 245 246 // This pass merges large allocas. StackSlotColoring is a different pass 247 // which merges spill slots. 248 addPass(&StackColoringID); 249 250 // If the target requests it, assign local variables to stack slots relative 251 // to one another and simplify frame index references where possible. 252 addPass(&LocalStackSlotAllocationID); 253 254 // With optimization, dead code should already be eliminated. However 255 // there is one known exception: lowered code for arguments that are only 256 // used by tail calls, where the tail calls reuse the incoming stack 257 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). 258 addPass(&DeadMachineInstructionElimID); 259 printAndVerify("After codegen DCE pass"); 260 261 // Allow targets to insert passes that improve instruction level parallelism, 262 // like if-conversion. Such passes will typically need dominator trees and 263 // loop info, just like LICM and CSE below. 264 if (addILPOpts()) 265 printAndVerify("After ILP optimizations"); 266 267 addPass(&MachineLICMID); 268 addPass(&MachineCSEID); 269 270 addPass(&MachineSinkingID); 271 printAndVerify("After Machine LICM, CSE and Sinking passes"); 272 273 addPass(&PeepholeOptimizerID); 274 printAndVerify("After codegen peephole optimization pass"); 275 } 276