1 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Top-level implementation for the NVPTX target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXTargetMachine.h" 15 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 16 #include "NVPTX.h" 17 #include "NVPTXAllocaHoisting.h" 18 #include "NVPTXLowerAggrCopies.h" 19 #include "llvm/Analysis/Passes.h" 20 #include "llvm/CodeGen/AsmPrinter.h" 21 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 22 #include "llvm/CodeGen/MachineModuleInfo.h" 23 #include "llvm/CodeGen/Passes.h" 24 #include "llvm/IR/DataLayout.h" 25 #include "llvm/IR/IRPrintingPasses.h" 26 #include "llvm/IR/Verifier.h" 27 #include "llvm/MC/MCAsmInfo.h" 28 #include "llvm/MC/MCInstrInfo.h" 29 #include "llvm/MC/MCStreamer.h" 30 #include "llvm/MC/MCSubtargetInfo.h" 31 #include "llvm/PassManager.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/FormattedStream.h" 35 #include "llvm/Support/TargetRegistry.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/Target/TargetInstrInfo.h" 38 #include "llvm/Target/TargetLowering.h" 39 #include "llvm/Target/TargetLoweringObjectFile.h" 40 #include "llvm/Target/TargetMachine.h" 41 #include "llvm/Target/TargetOptions.h" 42 #include "llvm/Target/TargetRegisterInfo.h" 43 #include "llvm/Target/TargetSubtargetInfo.h" 44 #include "llvm/Transforms/Scalar.h" 45 46 using namespace llvm; 47 48 namespace llvm { 49 void initializeNVVMReflectPass(PassRegistry&); 50 void initializeGenericToNVVMPass(PassRegistry&); 51 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 52 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 53 } 54 55 extern "C" void LLVMInitializeNVPTXTarget() { 56 // Register the target. 57 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 58 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 59 60 // FIXME: This pass is really intended to be invoked during IR optimization, 61 // but it's very NVPTX-specific. 62 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); 63 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); 64 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); 65 initializeNVPTXFavorNonGenericAddrSpacesPass( 66 *PassRegistry::getPassRegistry()); 67 } 68 69 static std::string computeDataLayout(const NVPTXSubtarget &ST) { 70 std::string Ret = "e"; 71 72 if (!ST.is64Bit()) 73 Ret += "-p:32:32"; 74 75 Ret += "-i64:64-v16:16-v32:32-n16:32:64"; 76 77 return Ret; 78 } 79 80 NVPTXTargetMachine::NVPTXTargetMachine( 81 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 82 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 83 CodeGenOpt::Level OL, bool is64bit) 84 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), 85 Subtarget(TT, CPU, FS, is64bit), DL(computeDataLayout(Subtarget)), 86 InstrInfo(*this), TLInfo(*this), TSInfo(&DL), 87 FrameLowering( 88 *this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ { 89 initAsmInfo(); 90 } 91 92 void NVPTXTargetMachine32::anchor() {} 93 94 NVPTXTargetMachine32::NVPTXTargetMachine32( 95 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 96 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 97 CodeGenOpt::Level OL) 98 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 99 100 void NVPTXTargetMachine64::anchor() {} 101 102 NVPTXTargetMachine64::NVPTXTargetMachine64( 103 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 104 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 105 CodeGenOpt::Level OL) 106 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 107 108 namespace { 109 class NVPTXPassConfig : public TargetPassConfig { 110 public: 111 NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) 112 : TargetPassConfig(TM, PM) {} 113 114 NVPTXTargetMachine &getNVPTXTargetMachine() const { 115 return getTM<NVPTXTargetMachine>(); 116 } 117 118 void addIRPasses() override; 119 bool addInstSelector() override; 120 bool addPreRegAlloc() override; 121 bool addPostRegAlloc() override; 122 123 FunctionPass *createTargetRegisterAllocator(bool) override; 124 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 125 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 126 }; 127 } // end anonymous namespace 128 129 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 130 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 131 return PassConfig; 132 } 133 134 void NVPTXPassConfig::addIRPasses() { 135 // The following passes are known to not play well with virtual regs hanging 136 // around after register allocation (which in our case, is *all* registers). 137 // We explicitly disable them here. We do, however, need some functionality 138 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 139 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 140 disablePass(&PrologEpilogCodeInserterID); 141 disablePass(&MachineCopyPropagationID); 142 disablePass(&BranchFolderPassID); 143 disablePass(&TailDuplicateID); 144 145 addPass(createNVPTXImageOptimizerPass()); 146 TargetPassConfig::addIRPasses(); 147 addPass(createNVPTXAssignValidGlobalNamesPass()); 148 addPass(createGenericToNVVMPass()); 149 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 150 addPass(createSeparateConstOffsetFromGEPPass()); 151 // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used 152 // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates 153 // significantly better code than EarlyCSE for some of our benchmarks. 154 if (getOptLevel() == CodeGenOpt::Aggressive) 155 addPass(createGVNPass()); 156 else 157 addPass(createEarlyCSEPass()); 158 // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave 159 // some dead code. We could remove dead code in an ad-hoc manner, but that 160 // requires manual work and might be error-prone. 161 // 162 // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, 163 // and leave them unused. 164 // 165 // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the 166 // old index and some of its intermediate results may become unused. 167 addPass(createDeadCodeEliminationPass()); 168 } 169 170 bool NVPTXPassConfig::addInstSelector() { 171 const NVPTXSubtarget &ST = 172 getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); 173 174 addPass(createLowerAggrCopies()); 175 addPass(createAllocaHoisting()); 176 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 177 178 if (!ST.hasImageHandles()) 179 addPass(createNVPTXReplaceImageHandlesPass()); 180 181 return false; 182 } 183 184 bool NVPTXPassConfig::addPreRegAlloc() { return false; } 185 bool NVPTXPassConfig::addPostRegAlloc() { 186 addPass(createNVPTXPrologEpilogPass()); 187 return false; 188 } 189 190 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 191 return nullptr; // No reg alloc 192 } 193 194 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 195 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 196 addPass(&PHIEliminationID); 197 addPass(&TwoAddressInstructionPassID); 198 } 199 200 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 201 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 202 203 addPass(&ProcessImplicitDefsID); 204 addPass(&LiveVariablesID); 205 addPass(&MachineLoopInfoID); 206 addPass(&PHIEliminationID); 207 208 addPass(&TwoAddressInstructionPassID); 209 addPass(&RegisterCoalescerID); 210 211 // PreRA instruction scheduling. 212 if (addPass(&MachineSchedulerID)) 213 printAndVerify("After Machine Scheduling"); 214 215 216 addPass(&StackSlotColoringID); 217 218 // FIXME: Needs physical registers 219 //addPass(&PostRAMachineLICMID); 220 221 printAndVerify("After StackSlotColoring"); 222 } 223