1ae556d3eSJustin Holewinski //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
2ae556d3eSJustin Holewinski //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ae556d3eSJustin Holewinski //
7ae556d3eSJustin Holewinski //===----------------------------------------------------------------------===//
8ae556d3eSJustin Holewinski //
9ae556d3eSJustin Holewinski // Top-level implementation for the NVPTX target.
10ae556d3eSJustin Holewinski //
11ae556d3eSJustin Holewinski //===----------------------------------------------------------------------===//
12ae556d3eSJustin Holewinski 
136bda14b3SChandler Carruth #include "NVPTXTargetMachine.h"
14ed0881b2SChandler Carruth #include "NVPTX.h"
15ae556d3eSJustin Holewinski #include "NVPTXAllocaHoisting.h"
167aa3cad4SWilliam S. Moses #include "NVPTXAtomicLower.h"
17ed0881b2SChandler Carruth #include "NVPTXLowerAggrCopies.h"
18a2719329SAditya Nandakumar #include "NVPTXTargetObjectFile.h"
1993dcdc47SChandler Carruth #include "NVPTXTargetTransformInfo.h"
20e8f83befSRichard Trieu #include "TargetInfo/NVPTXTargetInfo.h"
21c9f1f6b8SEugene Zelenko #include "llvm/ADT/STLExtras.h"
22c9f1f6b8SEugene Zelenko #include "llvm/ADT/Triple.h"
23c9f1f6b8SEugene Zelenko #include "llvm/Analysis/TargetTransformInfo.h"
24ae556d3eSJustin Holewinski #include "llvm/CodeGen/Passes.h"
2531d19d43SMatthias Braun #include "llvm/CodeGen/TargetPassConfig.h"
26bf225939SMichael Liao #include "llvm/IR/IntrinsicsNVPTX.h"
2730d69c2eSChandler Carruth #include "llvm/IR/LegacyPassManager.h"
2889b57061SReid Kleckner #include "llvm/MC/TargetRegistry.h"
29c9f1f6b8SEugene Zelenko #include "llvm/Pass.h"
309ccf13c3SArthur Eubanks #include "llvm/Passes/PassBuilder.h"
31ed0881b2SChandler Carruth #include "llvm/Support/CommandLine.h"
32ae556d3eSJustin Holewinski #include "llvm/Target/TargetMachine.h"
33ae556d3eSJustin Holewinski #include "llvm/Target/TargetOptions.h"
3481598117SStanislav Mekhanoshin #include "llvm/Transforms/IPO/PassManagerBuilder.h"
35ae556d3eSJustin Holewinski #include "llvm/Transforms/Scalar.h"
3689c45a16SChandler Carruth #include "llvm/Transforms/Scalar/GVN.h"
37cd564c6bSJustin Lebar #include "llvm/Transforms/Vectorize.h"
38c9f1f6b8SEugene Zelenko #include <cassert>
39c9f1f6b8SEugene Zelenko #include <string>
40ae556d3eSJustin Holewinski 
41ae556d3eSJustin Holewinski using namespace llvm;
42ae556d3eSJustin Holewinski 
43cd564c6bSJustin Lebar // LSV is still relatively new; this switch lets us turn it off in case we
44cd564c6bSJustin Lebar // encounter (or suspect) a bug.
45cd564c6bSJustin Lebar static cl::opt<bool>
46cd564c6bSJustin Lebar     DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
47cd564c6bSJustin Lebar                                cl::desc("Disable load/store vectorizer"),
48cd564c6bSJustin Lebar                                cl::init(false), cl::Hidden);
49cd564c6bSJustin Lebar 
508f9f0269STim Shen // TODO: Remove this flag when we are confident with no regressions.
518f9f0269STim Shen static cl::opt<bool> DisableRequireStructuredCFG(
528f9f0269STim Shen     "disable-nvptx-require-structured-cfg",
538f9f0269STim Shen     cl::desc("Transitional flag to turn off NVPTX's requirement on preserving "
548f9f0269STim Shen              "structured CFG. The requirement should be disabled only when "
558f9f0269STim Shen              "unexpected regressions happen."),
568f9f0269STim Shen     cl::init(false), cl::Hidden);
578f9f0269STim Shen 
582f348ea1SArtem Belevich static cl::opt<bool> UseShortPointersOpt(
592f348ea1SArtem Belevich     "nvptx-short-ptr",
602f348ea1SArtem Belevich     cl::desc(
612f348ea1SArtem Belevich         "Use 32-bit pointers for accessing const/local/shared address spaces."),
622f348ea1SArtem Belevich     cl::init(false), cl::Hidden);
632f348ea1SArtem Belevich 
64b94bd05bSJustin Holewinski namespace llvm {
65c9f1f6b8SEugene Zelenko 
6649e9a812SArtem Belevich void initializeNVVMIntrRangePass(PassRegistry&);
67b94bd05bSJustin Holewinski void initializeNVVMReflectPass(PassRegistry&);
6801f89f04SJustin Holewinski void initializeGenericToNVVMPass(PassRegistry&);
69414c0964SBenjamin Kramer void initializeNVPTXAllocaHoistingPass(PassRegistry &);
707aa3cad4SWilliam S. Moses void initializeNVPTXAtomicLowerPass(PassRegistry &);
71264cd467SEli Bendersky void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
72f14af162SEli Bendersky void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
737e9c9a65SArtem Belevich void initializeNVPTXLowerArgsPass(PassRegistry &);
74cd3afea4SJingyue Wu void initializeNVPTXLowerAllocaPass(PassRegistry &);
7549fac56eSJustin Lebar void initializeNVPTXProxyRegErasurePass(PassRegistry &);
76c9f1f6b8SEugene Zelenko 
77c9f1f6b8SEugene Zelenko } // end namespace llvm
78b94bd05bSJustin Holewinski 
LLVMInitializeNVPTXTarget()790dbcb363STom Stellard extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
80ae556d3eSJustin Holewinski   // Register the target.
81f42454b9SMehdi Amini   RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32());
82f42454b9SMehdi Amini   RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64());
83ae556d3eSJustin Holewinski 
84b94bd05bSJustin Holewinski   // FIXME: This pass is really intended to be invoked during IR optimization,
85b94bd05bSJustin Holewinski   // but it's very NVPTX-specific.
86f14af162SEli Bendersky   PassRegistry &PR = *PassRegistry::getPassRegistry();
87f14af162SEli Bendersky   initializeNVVMReflectPass(PR);
8849e9a812SArtem Belevich   initializeNVVMIntrRangePass(PR);
89f14af162SEli Bendersky   initializeGenericToNVVMPass(PR);
90f14af162SEli Bendersky   initializeNVPTXAllocaHoistingPass(PR);
91f14af162SEli Bendersky   initializeNVPTXAssignValidGlobalNamesPass(PR);
927aa3cad4SWilliam S. Moses   initializeNVPTXAtomicLowerPass(PR);
937e9c9a65SArtem Belevich   initializeNVPTXLowerArgsPass(PR);
94f14af162SEli Bendersky   initializeNVPTXLowerAllocaPass(PR);
95f14af162SEli Bendersky   initializeNVPTXLowerAggrCopiesPass(PR);
9649fac56eSJustin Lebar   initializeNVPTXProxyRegErasurePass(PR);
97ae556d3eSJustin Holewinski }
98ae556d3eSJustin Holewinski 
computeDataLayout(bool is64Bit,bool UseShortPointers)992f348ea1SArtem Belevich static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
1008b770651SEric Christopher   std::string Ret = "e";
1018b770651SEric Christopher 
1028b770651SEric Christopher   if (!is64Bit)
1038b770651SEric Christopher     Ret += "-p:32:32";
1042f348ea1SArtem Belevich   else if (UseShortPointers)
1052f348ea1SArtem Belevich     Ret += "-p3:32:32-p4:32:32-p5:32:32";
1068b770651SEric Christopher 
107d7a73824SArtem Belevich   Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
1088b770651SEric Christopher 
1098b770651SEric Christopher   return Ret;
1108b770651SEric Christopher }
1118b770651SEric Christopher 
NVPTXTargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OL,bool is64bit)1123e5de88dSDaniel Sanders NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
113a1869461SEric Christopher                                        StringRef CPU, StringRef FS,
114a1869461SEric Christopher                                        const TargetOptions &Options,
1158c34dd82SRafael Espindola                                        Optional<Reloc::Model> RM,
11679e238afSRafael Espindola                                        Optional<CodeModel::Model> CM,
1170497ab14SJustin Holewinski                                        CodeGenOpt::Level OL, bool is64bit)
1188c34dd82SRafael Espindola     // The pic relocation model is used regardless of what the client has
1198c34dd82SRafael Espindola     // specified, as it is the only relocation model currently supported.
1202f348ea1SArtem Belevich     : LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
1212f348ea1SArtem Belevich                         CPU, FS, Options, Reloc::PIC_,
122ca29c271SDavid Green                         getEffectiveCodeModel(CM, CodeModel::Small), OL),
1232f348ea1SArtem Belevich       is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
1240eaee545SJonas Devlieghere       TLOF(std::make_unique<NVPTXTargetObjectFile>()),
125adcd0268SBenjamin Kramer       Subtarget(TT, std::string(CPU), std::string(FS), *this) {
1263e5de88dSDaniel Sanders   if (TT.getOS() == Triple::NVCL)
1276aad8b18SEric Christopher     drvInterface = NVPTX::NVCL;
1286aad8b18SEric Christopher   else
1296aad8b18SEric Christopher     drvInterface = NVPTX::CUDA;
1308f9f0269STim Shen   if (!DisableRequireStructuredCFG)
1318f9f0269STim Shen     setRequiresStructuredCFG(true);
132227144c2SRafael Espindola   initAsmInfo();
133227144c2SRafael Espindola }
134ae556d3eSJustin Holewinski 
135c9f1f6b8SEugene Zelenko NVPTXTargetMachine::~NVPTXTargetMachine() = default;
136357600eaSReid Kleckner 
anchor()137ae556d3eSJustin Holewinski void NVPTXTargetMachine32::anchor() {}
138ae556d3eSJustin Holewinski 
NVPTXTargetMachine32(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OL,bool JIT)1393e5de88dSDaniel Sanders NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
1403e5de88dSDaniel Sanders                                            StringRef CPU, StringRef FS,
1413e5de88dSDaniel Sanders                                            const TargetOptions &Options,
1428c34dd82SRafael Espindola                                            Optional<Reloc::Model> RM,
14379e238afSRafael Espindola                                            Optional<CodeModel::Model> CM,
14479e238afSRafael Espindola                                            CodeGenOpt::Level OL, bool JIT)
1450497ab14SJustin Holewinski     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
146ae556d3eSJustin Holewinski 
anchor()147ae556d3eSJustin Holewinski void NVPTXTargetMachine64::anchor() {}
148ae556d3eSJustin Holewinski 
NVPTXTargetMachine64(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OL,bool JIT)1493e5de88dSDaniel Sanders NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
1503e5de88dSDaniel Sanders                                            StringRef CPU, StringRef FS,
1513e5de88dSDaniel Sanders                                            const TargetOptions &Options,
1528c34dd82SRafael Espindola                                            Optional<Reloc::Model> RM,
15379e238afSRafael Espindola                                            Optional<CodeModel::Model> CM,
15479e238afSRafael Espindola                                            CodeGenOpt::Level OL, bool JIT)
1550497ab14SJustin Holewinski     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
156ae556d3eSJustin Holewinski 
157d78bb468SBenjamin Kramer namespace {
158c9f1f6b8SEugene Zelenko 
159ae556d3eSJustin Holewinski class NVPTXPassConfig : public TargetPassConfig {
160ae556d3eSJustin Holewinski public:
NVPTXPassConfig(NVPTXTargetMachine & TM,PassManagerBase & PM)1615e394c3dSMatthias Braun   NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM)
162ae556d3eSJustin Holewinski       : TargetPassConfig(TM, PM) {}
163ae556d3eSJustin Holewinski 
getNVPTXTargetMachine() const164ae556d3eSJustin Holewinski   NVPTXTargetMachine &getNVPTXTargetMachine() const {
165ae556d3eSJustin Holewinski     return getTM<NVPTXTargetMachine>();
166ae556d3eSJustin Holewinski   }
167ae556d3eSJustin Holewinski 
1682865c986SCraig Topper   void addIRPasses() override;
1692865c986SCraig Topper   bool addInstSelector() override;
17049fac56eSJustin Lebar   void addPreRegAlloc() override;
1717e37a5f5SMatthias Braun   void addPostRegAlloc() override;
1726dca8398SJustin Holewinski   void addMachineSSAOptimization() override;
173dbb3b2f4SJustin Holewinski 
1742865c986SCraig Topper   FunctionPass *createTargetRegisterAllocator(bool) override;
175cf55a657SMatt Arsenault   void addFastRegAlloc() override;
176cf55a657SMatt Arsenault   void addOptimizedRegAlloc() override;
177cf55a657SMatt Arsenault 
addRegAssignAndRewriteFast()178c9122ddeSMatt Arsenault   bool addRegAssignAndRewriteFast() override {
179cf55a657SMatt Arsenault     llvm_unreachable("should not be used");
180cf55a657SMatt Arsenault   }
181cf55a657SMatt Arsenault 
addRegAssignAndRewriteOptimized()182c9122ddeSMatt Arsenault   bool addRegAssignAndRewriteOptimized() override {
183cf55a657SMatt Arsenault     llvm_unreachable("should not be used");
184cf55a657SMatt Arsenault   }
1856a3fdecaSJingyue Wu 
1866a3fdecaSJingyue Wu private:
187f650441bSJingyue Wu   // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
188f650441bSJingyue Wu   // function is only called in opt mode.
1896a3fdecaSJingyue Wu   void addEarlyCSEOrGVNPass();
190f650441bSJingyue Wu 
191f650441bSJingyue Wu   // Add passes that propagate special memory spaces.
1921375560bSJingyue Wu   void addAddressSpaceInferencePasses();
193f650441bSJingyue Wu 
194f650441bSJingyue Wu   // Add passes that perform straight-line scalar optimizations.
195f650441bSJingyue Wu   void addStraightLineScalarOptimizationPasses();
196ae556d3eSJustin Holewinski };
197c9f1f6b8SEugene Zelenko 
198d78bb468SBenjamin Kramer } // end anonymous namespace
199ae556d3eSJustin Holewinski 
createPassConfig(PassManagerBase & PM)200ae556d3eSJustin Holewinski TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
2015e394c3dSMatthias Braun   return new NVPTXPassConfig(*this, PM);
202ae556d3eSJustin Holewinski }
203ae556d3eSJustin Holewinski 
adjustPassManager(PassManagerBuilder & Builder)20481598117SStanislav Mekhanoshin void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
20581598117SStanislav Mekhanoshin   Builder.addExtension(
20681598117SStanislav Mekhanoshin     PassManagerBuilder::EP_EarlyAsPossible,
20781598117SStanislav Mekhanoshin     [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
2080a11b636SArtem Belevich       PM.add(createNVVMReflectPass(Subtarget.getSmVersion()));
20949e9a812SArtem Belevich       PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
21081598117SStanislav Mekhanoshin     });
2117cdbce59SJustin Lebar }
2127cdbce59SJustin Lebar 
registerPassBuilderCallbacks(PassBuilder & PB)21334a8a437SArthur Eubanks void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
2149ccf13c3SArthur Eubanks   PB.registerPipelineParsingCallback(
2159ccf13c3SArthur Eubanks       [](StringRef PassName, FunctionPassManager &PM,
2169ccf13c3SArthur Eubanks          ArrayRef<PassBuilder::PipelineElement>) {
2179ccf13c3SArthur Eubanks         if (PassName == "nvvm-reflect") {
2189ccf13c3SArthur Eubanks           PM.addPass(NVVMReflectPass());
2199ccf13c3SArthur Eubanks           return true;
2209ccf13c3SArthur Eubanks         }
2219ccf13c3SArthur Eubanks         if (PassName == "nvvm-intr-range") {
2229ccf13c3SArthur Eubanks           PM.addPass(NVVMIntrRangePass());
2239ccf13c3SArthur Eubanks           return true;
2249ccf13c3SArthur Eubanks         }
2259ccf13c3SArthur Eubanks         return false;
2269ccf13c3SArthur Eubanks       });
2279ccf13c3SArthur Eubanks 
228e84a4650SArthur Eubanks   PB.registerPipelineStartEPCallback(
2297a797b29STarindu Jayatilaka       [this](ModulePassManager &PM, OptimizationLevel Level) {
23034a8a437SArthur Eubanks         FunctionPassManager FPM;
231e84a4650SArthur Eubanks         FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
232e84a4650SArthur Eubanks         // FIXME: NVVMIntrRangePass is causing numerical discrepancies,
233e84a4650SArthur Eubanks         // investigate and re-enable.
234526c0955SArthur Eubanks         // FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion()));
235e84a4650SArthur Eubanks         PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
236e84a4650SArthur Eubanks       });
2379ccf13c3SArthur Eubanks }
2389ccf13c3SArthur Eubanks 
23926d11ca4SSanjoy Das TargetTransformInfo
getTargetTransformInfo(const Function & F) const240c4b1a63aSJameson Nash NVPTXTargetMachine::getTargetTransformInfo(const Function &F) const {
2415010ebf1SMehdi Amini   return TargetTransformInfo(NVPTXTTIImpl(this, F));
2420c981bd7SJingyue Wu }
2430c981bd7SJingyue Wu 
244bf225939SMichael Liao std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value * V) const245bf225939SMichael Liao NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
246bf225939SMichael Liao   if (auto *II = dyn_cast<IntrinsicInst>(V)) {
247bf225939SMichael Liao     switch (II->getIntrinsicID()) {
248bf225939SMichael Liao     case Intrinsic::nvvm_isspacep_const:
249bf225939SMichael Liao       return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_CONST);
250bf225939SMichael Liao     case Intrinsic::nvvm_isspacep_global:
251bf225939SMichael Liao       return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_GLOBAL);
252bf225939SMichael Liao     case Intrinsic::nvvm_isspacep_local:
253bf225939SMichael Liao       return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
254bf225939SMichael Liao     case Intrinsic::nvvm_isspacep_shared:
255bf225939SMichael Liao       return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
256bf225939SMichael Liao     default:
257bf225939SMichael Liao       break;
258bf225939SMichael Liao     }
259bf225939SMichael Liao   }
260bf225939SMichael Liao   return std::make_pair(nullptr, -1);
261bf225939SMichael Liao }
262bf225939SMichael Liao 
addEarlyCSEOrGVNPass()2636a3fdecaSJingyue Wu void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
2646a3fdecaSJingyue Wu   if (getOptLevel() == CodeGenOpt::Aggressive)
2656a3fdecaSJingyue Wu     addPass(createGVNPass());
2666a3fdecaSJingyue Wu   else
2676a3fdecaSJingyue Wu     addPass(createEarlyCSEPass());
2686a3fdecaSJingyue Wu }
2696a3fdecaSJingyue Wu 
addAddressSpaceInferencePasses()2701375560bSJingyue Wu void NVPTXPassConfig::addAddressSpaceInferencePasses() {
2717e9c9a65SArtem Belevich   // NVPTXLowerArgs emits alloca for byval parameters which can often
272cd3afea4SJingyue Wu   // be eliminated by SROA.
2732e4d1dd0SJingyue Wu   addPass(createSROAPass());
274cd3afea4SJingyue Wu   addPass(createNVPTXLowerAllocaPass());
275850657a4SMatt Arsenault   addPass(createInferAddressSpacesPass());
2767aa3cad4SWilliam S. Moses   addPass(createNVPTXAtomicLowerPass());
2771375560bSJingyue Wu }
2786a3fdecaSJingyue Wu 
addStraightLineScalarOptimizationPasses()279f650441bSJingyue Wu void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
280a108a65dSEli Bendersky   addPass(createSeparateConstOffsetFromGEPPass());
281e7981ceeSJingyue Wu   addPass(createSpeculativeExecutionPass());
2823286ec14SJingyue Wu   // ReassociateGEPs exposes more opportunites for SLSR. See
2833286ec14SJingyue Wu   // the example in reassociate-geps-and-slsr.ll.
2843286ec14SJingyue Wu   addPass(createStraightLineStrengthReducePass());
2853286ec14SJingyue Wu   // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
2863286ec14SJingyue Wu   // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
2873286ec14SJingyue Wu   // for some of our benchmarks.
2886a3fdecaSJingyue Wu   addEarlyCSEOrGVNPass();
28972fca6c8SJingyue Wu   // Run NaryReassociate after EarlyCSE/GVN to be more effective.
29072fca6c8SJingyue Wu   addPass(createNaryReassociatePass());
291c2a01469SJingyue Wu   // NaryReassociate on GEPs creates redundant common expressions, so run
292c2a01469SJingyue Wu   // EarlyCSE after it.
293c2a01469SJingyue Wu   addPass(createEarlyCSEPass());
294f650441bSJingyue Wu }
295f650441bSJingyue Wu 
addIRPasses()296f650441bSJingyue Wu void NVPTXPassConfig::addIRPasses() {
297f650441bSJingyue Wu   // The following passes are known to not play well with virtual regs hanging
298f650441bSJingyue Wu   // around after register allocation (which in our case, is *all* registers).
299f650441bSJingyue Wu   // We explicitly disable them here.  We do, however, need some functionality
300f650441bSJingyue Wu   // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
301f650441bSJingyue Wu   // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
302f650441bSJingyue Wu   disablePass(&PrologEpilogCodeInserterID);
303f650441bSJingyue Wu   disablePass(&MachineCopyPropagationID);
304f650441bSJingyue Wu   disablePass(&TailDuplicateID);
305ad154c83SDerek Schuff   disablePass(&StackMapLivenessID);
306ad154c83SDerek Schuff   disablePass(&LiveDebugValuesID);
3077ab1b32bSJun Bum Lim   disablePass(&PostRAMachineSinkingID);
308ad154c83SDerek Schuff   disablePass(&PostRASchedulerID);
309ad154c83SDerek Schuff   disablePass(&FuncletLayoutID);
310fe71ec77SSanjoy Das   disablePass(&PatchableFunctionID);
3117ab1b32bSJun Bum Lim   disablePass(&ShrinkWrapID);
312f650441bSJingyue Wu 
3137cdbce59SJustin Lebar   // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
3147cdbce59SJustin Lebar   // it here does nothing.  But since we need it for correctness when lowering
3157cdbce59SJustin Lebar   // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
3167cdbce59SJustin Lebar   // call addEarlyAsPossiblePasses.
3170a11b636SArtem Belevich   const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
3180a11b636SArtem Belevich   addPass(createNVVMReflectPass(ST.getSmVersion()));
3197cdbce59SJustin Lebar 
320f650441bSJingyue Wu   if (getOptLevel() != CodeGenOpt::None)
321f650441bSJingyue Wu     addPass(createNVPTXImageOptimizerPass());
322f650441bSJingyue Wu   addPass(createNVPTXAssignValidGlobalNamesPass());
323f650441bSJingyue Wu   addPass(createGenericToNVVMPass());
324f650441bSJingyue Wu 
3257e9c9a65SArtem Belevich   // NVPTXLowerArgs is required for correctness and should be run right
326c1b9d47bSJingyue Wu   // before the address space inference passes.
3277e9c9a65SArtem Belevich   addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
328f650441bSJingyue Wu   if (getOptLevel() != CodeGenOpt::None) {
3291375560bSJingyue Wu     addAddressSpaceInferencePasses();
330f650441bSJingyue Wu     addStraightLineScalarOptimizationPasses();
331f650441bSJingyue Wu   }
3326a3fdecaSJingyue Wu 
333*ecf5b780SShilei Tian   addPass(createAtomicExpandPass());
334*ecf5b780SShilei Tian 
3356a3fdecaSJingyue Wu   // === LSR and other generic IR passes ===
3366a3fdecaSJingyue Wu   TargetPassConfig::addIRPasses();
3376a3fdecaSJingyue Wu   // EarlyCSE is not always strong enough to clean up what LSR produces. For
3386a3fdecaSJingyue Wu   // example, GVN can combine
3396a3fdecaSJingyue Wu   //
3406a3fdecaSJingyue Wu   //   %0 = add %a, %b
3416a3fdecaSJingyue Wu   //   %1 = add %b, %a
3426a3fdecaSJingyue Wu   //
3436a3fdecaSJingyue Wu   // and
3446a3fdecaSJingyue Wu   //
3456a3fdecaSJingyue Wu   //   %0 = shl nsw %a, 2
3466a3fdecaSJingyue Wu   //   %1 = shl %a, 2
3476a3fdecaSJingyue Wu   //
3486a3fdecaSJingyue Wu   // but EarlyCSE can do neither of them.
349019ab61eSFrederic Bastien   if (getOptLevel() != CodeGenOpt::None) {
3506a3fdecaSJingyue Wu     addEarlyCSEOrGVNPass();
351019ab61eSFrederic Bastien     if (!DisableLoadStoreVectorizer)
352019ab61eSFrederic Bastien       addPass(createLoadStoreVectorizerPass());
353b6b7fe60SArtem Belevich     addPass(createSROAPass());
354019ab61eSFrederic Bastien   }
35501f89f04SJustin Holewinski }
35601f89f04SJustin Holewinski 
addInstSelector()357ae556d3eSJustin Holewinski bool NVPTXPassConfig::addInstSelector() {
3585c3dffc4SEric Christopher   const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
35930d56a7bSJustin Holewinski 
360bbd38dd9SBob Wilson   addPass(createLowerAggrCopies());
361bbd38dd9SBob Wilson   addPass(createAllocaHoisting());
362bbd38dd9SBob Wilson   addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
36330d56a7bSJustin Holewinski 
36430d56a7bSJustin Holewinski   if (!ST.hasImageHandles())
36530d56a7bSJustin Holewinski     addPass(createNVPTXReplaceImageHandlesPass());
36630d56a7bSJustin Holewinski 
367ae556d3eSJustin Holewinski   return false;
368ae556d3eSJustin Holewinski }
369ae556d3eSJustin Holewinski 
addPreRegAlloc()37049fac56eSJustin Lebar void NVPTXPassConfig::addPreRegAlloc() {
37149fac56eSJustin Lebar   // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
37249fac56eSJustin Lebar   addPass(createNVPTXProxyRegErasurePass());
37349fac56eSJustin Lebar }
37449fac56eSJustin Lebar 
addPostRegAlloc()3757e37a5f5SMatthias Braun void NVPTXPassConfig::addPostRegAlloc() {
376012248b0SJay Foad   addPass(createNVPTXPrologEpilogPass());
377c1b9d47bSJingyue Wu   if (getOptLevel() != CodeGenOpt::None) {
37877b5b385SJingyue Wu     // NVPTXPrologEpilogPass calculates frame object offset and replace frame
37977b5b385SJingyue Wu     // index with VRFrame register. NVPTXPeephole need to be run after that and
38077b5b385SJingyue Wu     // will replace VRFrame with VRFrameLocal when possible.
38177b5b385SJingyue Wu     addPass(createNVPTXPeephole());
382dbb3b2f4SJustin Holewinski   }
383c1b9d47bSJingyue Wu }
384dbb3b2f4SJustin Holewinski 
createTargetRegisterAllocator(bool)385fae7ff12SBenjamin Kramer FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
386062a2baeSCraig Topper   return nullptr; // No reg alloc
387fae7ff12SBenjamin Kramer }
388fae7ff12SBenjamin Kramer 
addFastRegAlloc()389cf55a657SMatt Arsenault void NVPTXPassConfig::addFastRegAlloc() {
390a51418c1SJustin Holewinski   addPass(&PHIEliminationID);
391a51418c1SJustin Holewinski   addPass(&TwoAddressInstructionPassID);
392dbb3b2f4SJustin Holewinski }
393dbb3b2f4SJustin Holewinski 
addOptimizedRegAlloc()394cf55a657SMatt Arsenault void NVPTXPassConfig::addOptimizedRegAlloc() {
395a51418c1SJustin Holewinski   addPass(&ProcessImplicitDefsID);
396a51418c1SJustin Holewinski   addPass(&LiveVariablesID);
397a51418c1SJustin Holewinski   addPass(&MachineLoopInfoID);
398a51418c1SJustin Holewinski   addPass(&PHIEliminationID);
399a51418c1SJustin Holewinski 
400a51418c1SJustin Holewinski   addPass(&TwoAddressInstructionPassID);
401a51418c1SJustin Holewinski   addPass(&RegisterCoalescerID);
402a51418c1SJustin Holewinski 
403a51418c1SJustin Holewinski   // PreRA instruction scheduling.
404a51418c1SJustin Holewinski   if (addPass(&MachineSchedulerID))
405a51418c1SJustin Holewinski     printAndVerify("After Machine Scheduling");
406a51418c1SJustin Holewinski 
407a51418c1SJustin Holewinski 
408a51418c1SJustin Holewinski   addPass(&StackSlotColoringID);
409a51418c1SJustin Holewinski 
410a51418c1SJustin Holewinski   // FIXME: Needs physical registers
4114a7c8e7aSMatthias Braun   //addPass(&MachineLICMID);
412a51418c1SJustin Holewinski 
413a51418c1SJustin Holewinski   printAndVerify("After StackSlotColoring");
414dbb3b2f4SJustin Holewinski }
4156dca8398SJustin Holewinski 
addMachineSSAOptimization()4166dca8398SJustin Holewinski void NVPTXPassConfig::addMachineSSAOptimization() {
4176dca8398SJustin Holewinski   // Pre-ra tail duplication.
4186dca8398SJustin Holewinski   if (addPass(&EarlyTailDuplicateID))
4196dca8398SJustin Holewinski     printAndVerify("After Pre-RegAlloc TailDuplicate");
4206dca8398SJustin Holewinski 
4216dca8398SJustin Holewinski   // Optimize PHIs before DCE: removing dead PHI cycles may make more
4226dca8398SJustin Holewinski   // instructions dead.
4236dca8398SJustin Holewinski   addPass(&OptimizePHIsID);
4246dca8398SJustin Holewinski 
4256dca8398SJustin Holewinski   // This pass merges large allocas. StackSlotColoring is a different pass
4266dca8398SJustin Holewinski   // which merges spill slots.
4276dca8398SJustin Holewinski   addPass(&StackColoringID);
4286dca8398SJustin Holewinski 
4296dca8398SJustin Holewinski   // If the target requests it, assign local variables to stack slots relative
4306dca8398SJustin Holewinski   // to one another and simplify frame index references where possible.
4316dca8398SJustin Holewinski   addPass(&LocalStackSlotAllocationID);
4326dca8398SJustin Holewinski 
4336dca8398SJustin Holewinski   // With optimization, dead code should already be eliminated. However
4346dca8398SJustin Holewinski   // there is one known exception: lowered code for arguments that are only
4356dca8398SJustin Holewinski   // used by tail calls, where the tail calls reuse the incoming stack
4366dca8398SJustin Holewinski   // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
4376dca8398SJustin Holewinski   addPass(&DeadMachineInstructionElimID);
4386dca8398SJustin Holewinski   printAndVerify("After codegen DCE pass");
4396dca8398SJustin Holewinski 
4406dca8398SJustin Holewinski   // Allow targets to insert passes that improve instruction level parallelism,
4416dca8398SJustin Holewinski   // like if-conversion. Such passes will typically need dominator trees and
4426dca8398SJustin Holewinski   // loop info, just like LICM and CSE below.
4436dca8398SJustin Holewinski   if (addILPOpts())
4446dca8398SJustin Holewinski     printAndVerify("After ILP optimizations");
4456dca8398SJustin Holewinski 
4464a7c8e7aSMatthias Braun   addPass(&EarlyMachineLICMID);
4476dca8398SJustin Holewinski   addPass(&MachineCSEID);
4486dca8398SJustin Holewinski 
4496dca8398SJustin Holewinski   addPass(&MachineSinkingID);
4506dca8398SJustin Holewinski   printAndVerify("After Machine LICM, CSE and Sinking passes");
4516dca8398SJustin Holewinski 
4526dca8398SJustin Holewinski   addPass(&PeepholeOptimizerID);
4536dca8398SJustin Holewinski   printAndVerify("After codegen peephole optimization pass");
4546dca8398SJustin Holewinski }
455