1 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Top-level implementation for the NVPTX target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXTargetMachine.h" 15 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 16 #include "NVPTX.h" 17 #include "NVPTXAllocaHoisting.h" 18 #include "NVPTXLowerAggrCopies.h" 19 #include "NVPTXTargetObjectFile.h" 20 #include "NVPTXTargetTransformInfo.h" 21 #include "llvm/Analysis/Passes.h" 22 #include "llvm/CodeGen/AsmPrinter.h" 23 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/Passes.h" 26 #include "llvm/IR/DataLayout.h" 27 #include "llvm/IR/IRPrintingPasses.h" 28 #include "llvm/IR/LegacyPassManager.h" 29 #include "llvm/IR/Verifier.h" 30 #include "llvm/MC/MCAsmInfo.h" 31 #include "llvm/MC/MCInstrInfo.h" 32 #include "llvm/MC/MCStreamer.h" 33 #include "llvm/MC/MCSubtargetInfo.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/FormattedStream.h" 37 #include "llvm/Support/TargetRegistry.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include "llvm/Target/TargetInstrInfo.h" 40 #include "llvm/Target/TargetLowering.h" 41 #include "llvm/Target/TargetLoweringObjectFile.h" 42 #include "llvm/Target/TargetMachine.h" 43 #include "llvm/Target/TargetOptions.h" 44 #include "llvm/Target/TargetRegisterInfo.h" 45 #include "llvm/Target/TargetSubtargetInfo.h" 46 #include "llvm/Transforms/Scalar.h" 47 48 using namespace llvm; 49 50 namespace llvm { 51 void initializeNVVMReflectPass(PassRegistry&); 52 void initializeGenericToNVVMPass(PassRegistry&); 53 void initializeNVPTXAllocaHoistingPass(PassRegistry &); 54 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 55 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 56 void initializeNVPTXLowerStructArgsPass(PassRegistry &); 57 } 58 59 extern "C" void LLVMInitializeNVPTXTarget() { 60 // Register the target. 61 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 62 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 63 64 // FIXME: This pass is really intended to be invoked during IR optimization, 65 // but it's very NVPTX-specific. 66 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); 67 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); 68 initializeNVPTXAllocaHoistingPass(*PassRegistry::getPassRegistry()); 69 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); 70 initializeNVPTXFavorNonGenericAddrSpacesPass( 71 *PassRegistry::getPassRegistry()); 72 initializeNVPTXLowerStructArgsPass(*PassRegistry::getPassRegistry()); 73 } 74 75 static std::string computeDataLayout(bool is64Bit) { 76 std::string Ret = "e"; 77 78 if (!is64Bit) 79 Ret += "-p:32:32"; 80 81 Ret += "-i64:64-v16:16-v32:32-n16:32:64"; 82 83 return Ret; 84 } 85 86 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, 87 StringRef CPU, StringRef FS, 88 const TargetOptions &Options, 89 Reloc::Model RM, CodeModel::Model CM, 90 CodeGenOpt::Level OL, bool is64bit) 91 : LLVMTargetMachine(T, computeDataLayout(is64bit), TT, CPU, FS, Options, RM, 92 CM, OL), 93 is64bit(is64bit), TLOF(make_unique<NVPTXTargetObjectFile>()), 94 Subtarget(TT, CPU, FS, *this) { 95 if (Triple(TT).getOS() == Triple::NVCL) 96 drvInterface = NVPTX::NVCL; 97 else 98 drvInterface = NVPTX::CUDA; 99 initAsmInfo(); 100 } 101 102 NVPTXTargetMachine::~NVPTXTargetMachine() {} 103 104 void NVPTXTargetMachine32::anchor() {} 105 106 NVPTXTargetMachine32::NVPTXTargetMachine32( 107 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 108 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 109 CodeGenOpt::Level OL) 110 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 111 112 void NVPTXTargetMachine64::anchor() {} 113 114 NVPTXTargetMachine64::NVPTXTargetMachine64( 115 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 116 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 117 CodeGenOpt::Level OL) 118 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 119 120 namespace { 121 class NVPTXPassConfig : public TargetPassConfig { 122 public: 123 NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) 124 : TargetPassConfig(TM, PM) {} 125 126 NVPTXTargetMachine &getNVPTXTargetMachine() const { 127 return getTM<NVPTXTargetMachine>(); 128 } 129 130 void addIRPasses() override; 131 bool addInstSelector() override; 132 void addPostRegAlloc() override; 133 void addMachineSSAOptimization() override; 134 135 FunctionPass *createTargetRegisterAllocator(bool) override; 136 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 137 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 138 }; 139 } // end anonymous namespace 140 141 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 142 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 143 return PassConfig; 144 } 145 146 TargetIRAnalysis NVPTXTargetMachine::getTargetIRAnalysis() { 147 return TargetIRAnalysis( 148 [this](Function &) { return TargetTransformInfo(NVPTXTTIImpl(this)); }); 149 } 150 151 void NVPTXPassConfig::addIRPasses() { 152 // The following passes are known to not play well with virtual regs hanging 153 // around after register allocation (which in our case, is *all* registers). 154 // We explicitly disable them here. We do, however, need some functionality 155 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 156 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 157 disablePass(&PrologEpilogCodeInserterID); 158 disablePass(&MachineCopyPropagationID); 159 disablePass(&BranchFolderPassID); 160 disablePass(&TailDuplicateID); 161 162 addPass(createNVPTXImageOptimizerPass()); 163 TargetPassConfig::addIRPasses(); 164 addPass(createNVPTXAssignValidGlobalNamesPass()); 165 addPass(createGenericToNVVMPass()); 166 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 167 // FavorNonGenericAddrSpaces shortcuts unnecessary addrspacecasts, and leave 168 // them unused. We could remove dead code in an ad-hoc manner, but that 169 // requires manual work and might be error-prone. 170 addPass(createDeadCodeEliminationPass()); 171 addPass(createSeparateConstOffsetFromGEPPass()); 172 // ReassociateGEPs exposes more opportunites for SLSR. See 173 // the example in reassociate-geps-and-slsr.ll. 174 addPass(createStraightLineStrengthReducePass()); 175 // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or 176 // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE 177 // for some of our benchmarks. 178 if (getOptLevel() == CodeGenOpt::Aggressive) 179 addPass(createGVNPass()); 180 else 181 addPass(createEarlyCSEPass()); 182 // Run NaryReassociate after EarlyCSE/GVN to be more effective. 183 addPass(createNaryReassociatePass()); 184 } 185 186 bool NVPTXPassConfig::addInstSelector() { 187 const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl(); 188 189 addPass(createLowerAggrCopies()); 190 addPass(createAllocaHoisting()); 191 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 192 193 if (!ST.hasImageHandles()) 194 addPass(createNVPTXReplaceImageHandlesPass()); 195 196 return false; 197 } 198 199 void NVPTXPassConfig::addPostRegAlloc() { 200 addPass(createNVPTXPrologEpilogPass(), false); 201 } 202 203 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 204 return nullptr; // No reg alloc 205 } 206 207 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 208 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 209 addPass(&PHIEliminationID); 210 addPass(&TwoAddressInstructionPassID); 211 } 212 213 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 214 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 215 216 addPass(&ProcessImplicitDefsID); 217 addPass(&LiveVariablesID); 218 addPass(&MachineLoopInfoID); 219 addPass(&PHIEliminationID); 220 221 addPass(&TwoAddressInstructionPassID); 222 addPass(&RegisterCoalescerID); 223 224 // PreRA instruction scheduling. 225 if (addPass(&MachineSchedulerID)) 226 printAndVerify("After Machine Scheduling"); 227 228 229 addPass(&StackSlotColoringID); 230 231 // FIXME: Needs physical registers 232 //addPass(&PostRAMachineLICMID); 233 234 printAndVerify("After StackSlotColoring"); 235 } 236 237 void NVPTXPassConfig::addMachineSSAOptimization() { 238 // Pre-ra tail duplication. 239 if (addPass(&EarlyTailDuplicateID)) 240 printAndVerify("After Pre-RegAlloc TailDuplicate"); 241 242 // Optimize PHIs before DCE: removing dead PHI cycles may make more 243 // instructions dead. 244 addPass(&OptimizePHIsID); 245 246 // This pass merges large allocas. StackSlotColoring is a different pass 247 // which merges spill slots. 248 addPass(&StackColoringID); 249 250 // If the target requests it, assign local variables to stack slots relative 251 // to one another and simplify frame index references where possible. 252 addPass(&LocalStackSlotAllocationID); 253 254 // With optimization, dead code should already be eliminated. However 255 // there is one known exception: lowered code for arguments that are only 256 // used by tail calls, where the tail calls reuse the incoming stack 257 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). 258 addPass(&DeadMachineInstructionElimID); 259 printAndVerify("After codegen DCE pass"); 260 261 // Allow targets to insert passes that improve instruction level parallelism, 262 // like if-conversion. Such passes will typically need dominator trees and 263 // loop info, just like LICM and CSE below. 264 if (addILPOpts()) 265 printAndVerify("After ILP optimizations"); 266 267 addPass(&MachineLICMID); 268 addPass(&MachineCSEID); 269 270 addPass(&MachineSinkingID); 271 printAndVerify("After Machine LICM, CSE and Sinking passes"); 272 273 addPass(&PeepholeOptimizerID); 274 printAndVerify("After codegen peephole optimization pass"); 275 } 276