1ae556d3eSJustin Holewinski //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
2ae556d3eSJustin Holewinski //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ae556d3eSJustin Holewinski //
7ae556d3eSJustin Holewinski //===----------------------------------------------------------------------===//
8ae556d3eSJustin Holewinski //
9ae556d3eSJustin Holewinski // Top-level implementation for the NVPTX target.
10ae556d3eSJustin Holewinski //
11ae556d3eSJustin Holewinski //===----------------------------------------------------------------------===//
12ae556d3eSJustin Holewinski
136bda14b3SChandler Carruth #include "NVPTXTargetMachine.h"
14ed0881b2SChandler Carruth #include "NVPTX.h"
15ae556d3eSJustin Holewinski #include "NVPTXAllocaHoisting.h"
167aa3cad4SWilliam S. Moses #include "NVPTXAtomicLower.h"
17ed0881b2SChandler Carruth #include "NVPTXLowerAggrCopies.h"
18a2719329SAditya Nandakumar #include "NVPTXTargetObjectFile.h"
1993dcdc47SChandler Carruth #include "NVPTXTargetTransformInfo.h"
20e8f83befSRichard Trieu #include "TargetInfo/NVPTXTargetInfo.h"
21c9f1f6b8SEugene Zelenko #include "llvm/ADT/STLExtras.h"
22c9f1f6b8SEugene Zelenko #include "llvm/ADT/Triple.h"
23c9f1f6b8SEugene Zelenko #include "llvm/Analysis/TargetTransformInfo.h"
24ae556d3eSJustin Holewinski #include "llvm/CodeGen/Passes.h"
2531d19d43SMatthias Braun #include "llvm/CodeGen/TargetPassConfig.h"
26bf225939SMichael Liao #include "llvm/IR/IntrinsicsNVPTX.h"
2730d69c2eSChandler Carruth #include "llvm/IR/LegacyPassManager.h"
2889b57061SReid Kleckner #include "llvm/MC/TargetRegistry.h"
29c9f1f6b8SEugene Zelenko #include "llvm/Pass.h"
309ccf13c3SArthur Eubanks #include "llvm/Passes/PassBuilder.h"
31ed0881b2SChandler Carruth #include "llvm/Support/CommandLine.h"
32ae556d3eSJustin Holewinski #include "llvm/Target/TargetMachine.h"
33ae556d3eSJustin Holewinski #include "llvm/Target/TargetOptions.h"
3481598117SStanislav Mekhanoshin #include "llvm/Transforms/IPO/PassManagerBuilder.h"
35ae556d3eSJustin Holewinski #include "llvm/Transforms/Scalar.h"
3689c45a16SChandler Carruth #include "llvm/Transforms/Scalar/GVN.h"
37cd564c6bSJustin Lebar #include "llvm/Transforms/Vectorize.h"
38c9f1f6b8SEugene Zelenko #include <cassert>
39c9f1f6b8SEugene Zelenko #include <string>
40ae556d3eSJustin Holewinski
41ae556d3eSJustin Holewinski using namespace llvm;
42ae556d3eSJustin Holewinski
43cd564c6bSJustin Lebar // LSV is still relatively new; this switch lets us turn it off in case we
44cd564c6bSJustin Lebar // encounter (or suspect) a bug.
45cd564c6bSJustin Lebar static cl::opt<bool>
46cd564c6bSJustin Lebar DisableLoadStoreVectorizer("disable-nvptx-load-store-vectorizer",
47cd564c6bSJustin Lebar cl::desc("Disable load/store vectorizer"),
48cd564c6bSJustin Lebar cl::init(false), cl::Hidden);
49cd564c6bSJustin Lebar
508f9f0269STim Shen // TODO: Remove this flag when we are confident with no regressions.
518f9f0269STim Shen static cl::opt<bool> DisableRequireStructuredCFG(
528f9f0269STim Shen "disable-nvptx-require-structured-cfg",
538f9f0269STim Shen cl::desc("Transitional flag to turn off NVPTX's requirement on preserving "
548f9f0269STim Shen "structured CFG. The requirement should be disabled only when "
558f9f0269STim Shen "unexpected regressions happen."),
568f9f0269STim Shen cl::init(false), cl::Hidden);
578f9f0269STim Shen
582f348ea1SArtem Belevich static cl::opt<bool> UseShortPointersOpt(
592f348ea1SArtem Belevich "nvptx-short-ptr",
602f348ea1SArtem Belevich cl::desc(
612f348ea1SArtem Belevich "Use 32-bit pointers for accessing const/local/shared address spaces."),
622f348ea1SArtem Belevich cl::init(false), cl::Hidden);
632f348ea1SArtem Belevich
64b94bd05bSJustin Holewinski namespace llvm {
65c9f1f6b8SEugene Zelenko
6649e9a812SArtem Belevich void initializeNVVMIntrRangePass(PassRegistry&);
67b94bd05bSJustin Holewinski void initializeNVVMReflectPass(PassRegistry&);
6801f89f04SJustin Holewinski void initializeGenericToNVVMPass(PassRegistry&);
69414c0964SBenjamin Kramer void initializeNVPTXAllocaHoistingPass(PassRegistry &);
707aa3cad4SWilliam S. Moses void initializeNVPTXAtomicLowerPass(PassRegistry &);
71264cd467SEli Bendersky void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
72f14af162SEli Bendersky void initializeNVPTXLowerAggrCopiesPass(PassRegistry &);
737e9c9a65SArtem Belevich void initializeNVPTXLowerArgsPass(PassRegistry &);
74cd3afea4SJingyue Wu void initializeNVPTXLowerAllocaPass(PassRegistry &);
7549fac56eSJustin Lebar void initializeNVPTXProxyRegErasurePass(PassRegistry &);
76c9f1f6b8SEugene Zelenko
77c9f1f6b8SEugene Zelenko } // end namespace llvm
78b94bd05bSJustin Holewinski
LLVMInitializeNVPTXTarget()790dbcb363STom Stellard extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeNVPTXTarget() {
80ae556d3eSJustin Holewinski // Register the target.
81f42454b9SMehdi Amini RegisterTargetMachine<NVPTXTargetMachine32> X(getTheNVPTXTarget32());
82f42454b9SMehdi Amini RegisterTargetMachine<NVPTXTargetMachine64> Y(getTheNVPTXTarget64());
83ae556d3eSJustin Holewinski
84b94bd05bSJustin Holewinski // FIXME: This pass is really intended to be invoked during IR optimization,
85b94bd05bSJustin Holewinski // but it's very NVPTX-specific.
86f14af162SEli Bendersky PassRegistry &PR = *PassRegistry::getPassRegistry();
87f14af162SEli Bendersky initializeNVVMReflectPass(PR);
8849e9a812SArtem Belevich initializeNVVMIntrRangePass(PR);
89f14af162SEli Bendersky initializeGenericToNVVMPass(PR);
90f14af162SEli Bendersky initializeNVPTXAllocaHoistingPass(PR);
91f14af162SEli Bendersky initializeNVPTXAssignValidGlobalNamesPass(PR);
927aa3cad4SWilliam S. Moses initializeNVPTXAtomicLowerPass(PR);
937e9c9a65SArtem Belevich initializeNVPTXLowerArgsPass(PR);
94f14af162SEli Bendersky initializeNVPTXLowerAllocaPass(PR);
95f14af162SEli Bendersky initializeNVPTXLowerAggrCopiesPass(PR);
9649fac56eSJustin Lebar initializeNVPTXProxyRegErasurePass(PR);
97ae556d3eSJustin Holewinski }
98ae556d3eSJustin Holewinski
computeDataLayout(bool is64Bit,bool UseShortPointers)992f348ea1SArtem Belevich static std::string computeDataLayout(bool is64Bit, bool UseShortPointers) {
1008b770651SEric Christopher std::string Ret = "e";
1018b770651SEric Christopher
1028b770651SEric Christopher if (!is64Bit)
1038b770651SEric Christopher Ret += "-p:32:32";
1042f348ea1SArtem Belevich else if (UseShortPointers)
1052f348ea1SArtem Belevich Ret += "-p3:32:32-p4:32:32-p5:32:32";
1068b770651SEric Christopher
107d7a73824SArtem Belevich Ret += "-i64:64-i128:128-v16:16-v32:32-n16:32:64";
1088b770651SEric Christopher
1098b770651SEric Christopher return Ret;
1108b770651SEric Christopher }
1118b770651SEric Christopher
NVPTXTargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OL,bool is64bit)1123e5de88dSDaniel Sanders NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, const Triple &TT,
113a1869461SEric Christopher StringRef CPU, StringRef FS,
114a1869461SEric Christopher const TargetOptions &Options,
1158c34dd82SRafael Espindola Optional<Reloc::Model> RM,
11679e238afSRafael Espindola Optional<CodeModel::Model> CM,
1170497ab14SJustin Holewinski CodeGenOpt::Level OL, bool is64bit)
1188c34dd82SRafael Espindola // The pic relocation model is used regardless of what the client has
1198c34dd82SRafael Espindola // specified, as it is the only relocation model currently supported.
1202f348ea1SArtem Belevich : LLVMTargetMachine(T, computeDataLayout(is64bit, UseShortPointersOpt), TT,
1212f348ea1SArtem Belevich CPU, FS, Options, Reloc::PIC_,
122ca29c271SDavid Green getEffectiveCodeModel(CM, CodeModel::Small), OL),
1232f348ea1SArtem Belevich is64bit(is64bit), UseShortPointers(UseShortPointersOpt),
1240eaee545SJonas Devlieghere TLOF(std::make_unique<NVPTXTargetObjectFile>()),
125adcd0268SBenjamin Kramer Subtarget(TT, std::string(CPU), std::string(FS), *this) {
1263e5de88dSDaniel Sanders if (TT.getOS() == Triple::NVCL)
1276aad8b18SEric Christopher drvInterface = NVPTX::NVCL;
1286aad8b18SEric Christopher else
1296aad8b18SEric Christopher drvInterface = NVPTX::CUDA;
1308f9f0269STim Shen if (!DisableRequireStructuredCFG)
1318f9f0269STim Shen setRequiresStructuredCFG(true);
132227144c2SRafael Espindola initAsmInfo();
133227144c2SRafael Espindola }
134ae556d3eSJustin Holewinski
135c9f1f6b8SEugene Zelenko NVPTXTargetMachine::~NVPTXTargetMachine() = default;
136357600eaSReid Kleckner
anchor()137ae556d3eSJustin Holewinski void NVPTXTargetMachine32::anchor() {}
138ae556d3eSJustin Holewinski
NVPTXTargetMachine32(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OL,bool JIT)1393e5de88dSDaniel Sanders NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, const Triple &TT,
1403e5de88dSDaniel Sanders StringRef CPU, StringRef FS,
1413e5de88dSDaniel Sanders const TargetOptions &Options,
1428c34dd82SRafael Espindola Optional<Reloc::Model> RM,
14379e238afSRafael Espindola Optional<CodeModel::Model> CM,
14479e238afSRafael Espindola CodeGenOpt::Level OL, bool JIT)
1450497ab14SJustin Holewinski : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
146ae556d3eSJustin Holewinski
anchor()147ae556d3eSJustin Holewinski void NVPTXTargetMachine64::anchor() {}
148ae556d3eSJustin Holewinski
NVPTXTargetMachine64(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,const TargetOptions & Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OL,bool JIT)1493e5de88dSDaniel Sanders NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, const Triple &TT,
1503e5de88dSDaniel Sanders StringRef CPU, StringRef FS,
1513e5de88dSDaniel Sanders const TargetOptions &Options,
1528c34dd82SRafael Espindola Optional<Reloc::Model> RM,
15379e238afSRafael Espindola Optional<CodeModel::Model> CM,
15479e238afSRafael Espindola CodeGenOpt::Level OL, bool JIT)
1550497ab14SJustin Holewinski : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
156ae556d3eSJustin Holewinski
157d78bb468SBenjamin Kramer namespace {
158c9f1f6b8SEugene Zelenko
159ae556d3eSJustin Holewinski class NVPTXPassConfig : public TargetPassConfig {
160ae556d3eSJustin Holewinski public:
NVPTXPassConfig(NVPTXTargetMachine & TM,PassManagerBase & PM)1615e394c3dSMatthias Braun NVPTXPassConfig(NVPTXTargetMachine &TM, PassManagerBase &PM)
162ae556d3eSJustin Holewinski : TargetPassConfig(TM, PM) {}
163ae556d3eSJustin Holewinski
getNVPTXTargetMachine() const164ae556d3eSJustin Holewinski NVPTXTargetMachine &getNVPTXTargetMachine() const {
165ae556d3eSJustin Holewinski return getTM<NVPTXTargetMachine>();
166ae556d3eSJustin Holewinski }
167ae556d3eSJustin Holewinski
1682865c986SCraig Topper void addIRPasses() override;
1692865c986SCraig Topper bool addInstSelector() override;
17049fac56eSJustin Lebar void addPreRegAlloc() override;
1717e37a5f5SMatthias Braun void addPostRegAlloc() override;
1726dca8398SJustin Holewinski void addMachineSSAOptimization() override;
173dbb3b2f4SJustin Holewinski
1742865c986SCraig Topper FunctionPass *createTargetRegisterAllocator(bool) override;
175cf55a657SMatt Arsenault void addFastRegAlloc() override;
176cf55a657SMatt Arsenault void addOptimizedRegAlloc() override;
177cf55a657SMatt Arsenault
addRegAssignAndRewriteFast()178c9122ddeSMatt Arsenault bool addRegAssignAndRewriteFast() override {
179cf55a657SMatt Arsenault llvm_unreachable("should not be used");
180cf55a657SMatt Arsenault }
181cf55a657SMatt Arsenault
addRegAssignAndRewriteOptimized()182c9122ddeSMatt Arsenault bool addRegAssignAndRewriteOptimized() override {
183cf55a657SMatt Arsenault llvm_unreachable("should not be used");
184cf55a657SMatt Arsenault }
1856a3fdecaSJingyue Wu
1866a3fdecaSJingyue Wu private:
187f650441bSJingyue Wu // If the opt level is aggressive, add GVN; otherwise, add EarlyCSE. This
188f650441bSJingyue Wu // function is only called in opt mode.
1896a3fdecaSJingyue Wu void addEarlyCSEOrGVNPass();
190f650441bSJingyue Wu
191f650441bSJingyue Wu // Add passes that propagate special memory spaces.
1921375560bSJingyue Wu void addAddressSpaceInferencePasses();
193f650441bSJingyue Wu
194f650441bSJingyue Wu // Add passes that perform straight-line scalar optimizations.
195f650441bSJingyue Wu void addStraightLineScalarOptimizationPasses();
196ae556d3eSJustin Holewinski };
197c9f1f6b8SEugene Zelenko
198d78bb468SBenjamin Kramer } // end anonymous namespace
199ae556d3eSJustin Holewinski
createPassConfig(PassManagerBase & PM)200ae556d3eSJustin Holewinski TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
2015e394c3dSMatthias Braun return new NVPTXPassConfig(*this, PM);
202ae556d3eSJustin Holewinski }
203ae556d3eSJustin Holewinski
adjustPassManager(PassManagerBuilder & Builder)20481598117SStanislav Mekhanoshin void NVPTXTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
20581598117SStanislav Mekhanoshin Builder.addExtension(
20681598117SStanislav Mekhanoshin PassManagerBuilder::EP_EarlyAsPossible,
20781598117SStanislav Mekhanoshin [&](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
2080a11b636SArtem Belevich PM.add(createNVVMReflectPass(Subtarget.getSmVersion()));
20949e9a812SArtem Belevich PM.add(createNVVMIntrRangePass(Subtarget.getSmVersion()));
21081598117SStanislav Mekhanoshin });
2117cdbce59SJustin Lebar }
2127cdbce59SJustin Lebar
registerPassBuilderCallbacks(PassBuilder & PB)21334a8a437SArthur Eubanks void NVPTXTargetMachine::registerPassBuilderCallbacks(PassBuilder &PB) {
2149ccf13c3SArthur Eubanks PB.registerPipelineParsingCallback(
2159ccf13c3SArthur Eubanks [](StringRef PassName, FunctionPassManager &PM,
2169ccf13c3SArthur Eubanks ArrayRef<PassBuilder::PipelineElement>) {
2179ccf13c3SArthur Eubanks if (PassName == "nvvm-reflect") {
2189ccf13c3SArthur Eubanks PM.addPass(NVVMReflectPass());
2199ccf13c3SArthur Eubanks return true;
2209ccf13c3SArthur Eubanks }
2219ccf13c3SArthur Eubanks if (PassName == "nvvm-intr-range") {
2229ccf13c3SArthur Eubanks PM.addPass(NVVMIntrRangePass());
2239ccf13c3SArthur Eubanks return true;
2249ccf13c3SArthur Eubanks }
2259ccf13c3SArthur Eubanks return false;
2269ccf13c3SArthur Eubanks });
2279ccf13c3SArthur Eubanks
228e84a4650SArthur Eubanks PB.registerPipelineStartEPCallback(
2297a797b29STarindu Jayatilaka [this](ModulePassManager &PM, OptimizationLevel Level) {
23034a8a437SArthur Eubanks FunctionPassManager FPM;
231e84a4650SArthur Eubanks FPM.addPass(NVVMReflectPass(Subtarget.getSmVersion()));
232e84a4650SArthur Eubanks // FIXME: NVVMIntrRangePass is causing numerical discrepancies,
233e84a4650SArthur Eubanks // investigate and re-enable.
234526c0955SArthur Eubanks // FPM.addPass(NVVMIntrRangePass(Subtarget.getSmVersion()));
235e84a4650SArthur Eubanks PM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
236e84a4650SArthur Eubanks });
2379ccf13c3SArthur Eubanks }
2389ccf13c3SArthur Eubanks
23926d11ca4SSanjoy Das TargetTransformInfo
getTargetTransformInfo(const Function & F) const240c4b1a63aSJameson Nash NVPTXTargetMachine::getTargetTransformInfo(const Function &F) const {
2415010ebf1SMehdi Amini return TargetTransformInfo(NVPTXTTIImpl(this, F));
2420c981bd7SJingyue Wu }
2430c981bd7SJingyue Wu
244bf225939SMichael Liao std::pair<const Value *, unsigned>
getPredicatedAddrSpace(const Value * V) const245bf225939SMichael Liao NVPTXTargetMachine::getPredicatedAddrSpace(const Value *V) const {
246bf225939SMichael Liao if (auto *II = dyn_cast<IntrinsicInst>(V)) {
247bf225939SMichael Liao switch (II->getIntrinsicID()) {
248bf225939SMichael Liao case Intrinsic::nvvm_isspacep_const:
249bf225939SMichael Liao return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_CONST);
250bf225939SMichael Liao case Intrinsic::nvvm_isspacep_global:
251bf225939SMichael Liao return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_GLOBAL);
252bf225939SMichael Liao case Intrinsic::nvvm_isspacep_local:
253bf225939SMichael Liao return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_LOCAL);
254bf225939SMichael Liao case Intrinsic::nvvm_isspacep_shared:
255bf225939SMichael Liao return std::make_pair(II->getArgOperand(0), llvm::ADDRESS_SPACE_SHARED);
256bf225939SMichael Liao default:
257bf225939SMichael Liao break;
258bf225939SMichael Liao }
259bf225939SMichael Liao }
260bf225939SMichael Liao return std::make_pair(nullptr, -1);
261bf225939SMichael Liao }
262bf225939SMichael Liao
addEarlyCSEOrGVNPass()2636a3fdecaSJingyue Wu void NVPTXPassConfig::addEarlyCSEOrGVNPass() {
2646a3fdecaSJingyue Wu if (getOptLevel() == CodeGenOpt::Aggressive)
2656a3fdecaSJingyue Wu addPass(createGVNPass());
2666a3fdecaSJingyue Wu else
2676a3fdecaSJingyue Wu addPass(createEarlyCSEPass());
2686a3fdecaSJingyue Wu }
2696a3fdecaSJingyue Wu
addAddressSpaceInferencePasses()2701375560bSJingyue Wu void NVPTXPassConfig::addAddressSpaceInferencePasses() {
2717e9c9a65SArtem Belevich // NVPTXLowerArgs emits alloca for byval parameters which can often
272cd3afea4SJingyue Wu // be eliminated by SROA.
2732e4d1dd0SJingyue Wu addPass(createSROAPass());
274cd3afea4SJingyue Wu addPass(createNVPTXLowerAllocaPass());
275850657a4SMatt Arsenault addPass(createInferAddressSpacesPass());
2767aa3cad4SWilliam S. Moses addPass(createNVPTXAtomicLowerPass());
2771375560bSJingyue Wu }
2786a3fdecaSJingyue Wu
addStraightLineScalarOptimizationPasses()279f650441bSJingyue Wu void NVPTXPassConfig::addStraightLineScalarOptimizationPasses() {
280a108a65dSEli Bendersky addPass(createSeparateConstOffsetFromGEPPass());
281e7981ceeSJingyue Wu addPass(createSpeculativeExecutionPass());
2823286ec14SJingyue Wu // ReassociateGEPs exposes more opportunites for SLSR. See
2833286ec14SJingyue Wu // the example in reassociate-geps-and-slsr.ll.
2843286ec14SJingyue Wu addPass(createStraightLineStrengthReducePass());
2853286ec14SJingyue Wu // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
2863286ec14SJingyue Wu // EarlyCSE can reuse. GVN generates significantly better code than EarlyCSE
2873286ec14SJingyue Wu // for some of our benchmarks.
2886a3fdecaSJingyue Wu addEarlyCSEOrGVNPass();
28972fca6c8SJingyue Wu // Run NaryReassociate after EarlyCSE/GVN to be more effective.
29072fca6c8SJingyue Wu addPass(createNaryReassociatePass());
291c2a01469SJingyue Wu // NaryReassociate on GEPs creates redundant common expressions, so run
292c2a01469SJingyue Wu // EarlyCSE after it.
293c2a01469SJingyue Wu addPass(createEarlyCSEPass());
294f650441bSJingyue Wu }
295f650441bSJingyue Wu
addIRPasses()296f650441bSJingyue Wu void NVPTXPassConfig::addIRPasses() {
297f650441bSJingyue Wu // The following passes are known to not play well with virtual regs hanging
298f650441bSJingyue Wu // around after register allocation (which in our case, is *all* registers).
299f650441bSJingyue Wu // We explicitly disable them here. We do, however, need some functionality
300f650441bSJingyue Wu // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
301f650441bSJingyue Wu // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
302f650441bSJingyue Wu disablePass(&PrologEpilogCodeInserterID);
303f650441bSJingyue Wu disablePass(&MachineCopyPropagationID);
304f650441bSJingyue Wu disablePass(&TailDuplicateID);
305ad154c83SDerek Schuff disablePass(&StackMapLivenessID);
306ad154c83SDerek Schuff disablePass(&LiveDebugValuesID);
3077ab1b32bSJun Bum Lim disablePass(&PostRAMachineSinkingID);
308ad154c83SDerek Schuff disablePass(&PostRASchedulerID);
309ad154c83SDerek Schuff disablePass(&FuncletLayoutID);
310fe71ec77SSanjoy Das disablePass(&PatchableFunctionID);
3117ab1b32bSJun Bum Lim disablePass(&ShrinkWrapID);
312f650441bSJingyue Wu
3137cdbce59SJustin Lebar // NVVMReflectPass is added in addEarlyAsPossiblePasses, so hopefully running
3147cdbce59SJustin Lebar // it here does nothing. But since we need it for correctness when lowering
3157cdbce59SJustin Lebar // to NVPTX, run it here too, in case whoever built our pass pipeline didn't
3167cdbce59SJustin Lebar // call addEarlyAsPossiblePasses.
3170a11b636SArtem Belevich const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
3180a11b636SArtem Belevich addPass(createNVVMReflectPass(ST.getSmVersion()));
3197cdbce59SJustin Lebar
320f650441bSJingyue Wu if (getOptLevel() != CodeGenOpt::None)
321f650441bSJingyue Wu addPass(createNVPTXImageOptimizerPass());
322f650441bSJingyue Wu addPass(createNVPTXAssignValidGlobalNamesPass());
323f650441bSJingyue Wu addPass(createGenericToNVVMPass());
324f650441bSJingyue Wu
3257e9c9a65SArtem Belevich // NVPTXLowerArgs is required for correctness and should be run right
326c1b9d47bSJingyue Wu // before the address space inference passes.
3277e9c9a65SArtem Belevich addPass(createNVPTXLowerArgsPass(&getNVPTXTargetMachine()));
328f650441bSJingyue Wu if (getOptLevel() != CodeGenOpt::None) {
3291375560bSJingyue Wu addAddressSpaceInferencePasses();
330f650441bSJingyue Wu addStraightLineScalarOptimizationPasses();
331f650441bSJingyue Wu }
3326a3fdecaSJingyue Wu
333*ecf5b780SShilei Tian addPass(createAtomicExpandPass());
334*ecf5b780SShilei Tian
3356a3fdecaSJingyue Wu // === LSR and other generic IR passes ===
3366a3fdecaSJingyue Wu TargetPassConfig::addIRPasses();
3376a3fdecaSJingyue Wu // EarlyCSE is not always strong enough to clean up what LSR produces. For
3386a3fdecaSJingyue Wu // example, GVN can combine
3396a3fdecaSJingyue Wu //
3406a3fdecaSJingyue Wu // %0 = add %a, %b
3416a3fdecaSJingyue Wu // %1 = add %b, %a
3426a3fdecaSJingyue Wu //
3436a3fdecaSJingyue Wu // and
3446a3fdecaSJingyue Wu //
3456a3fdecaSJingyue Wu // %0 = shl nsw %a, 2
3466a3fdecaSJingyue Wu // %1 = shl %a, 2
3476a3fdecaSJingyue Wu //
3486a3fdecaSJingyue Wu // but EarlyCSE can do neither of them.
349019ab61eSFrederic Bastien if (getOptLevel() != CodeGenOpt::None) {
3506a3fdecaSJingyue Wu addEarlyCSEOrGVNPass();
351019ab61eSFrederic Bastien if (!DisableLoadStoreVectorizer)
352019ab61eSFrederic Bastien addPass(createLoadStoreVectorizerPass());
353b6b7fe60SArtem Belevich addPass(createSROAPass());
354019ab61eSFrederic Bastien }
35501f89f04SJustin Holewinski }
35601f89f04SJustin Holewinski
addInstSelector()357ae556d3eSJustin Holewinski bool NVPTXPassConfig::addInstSelector() {
3585c3dffc4SEric Christopher const NVPTXSubtarget &ST = *getTM<NVPTXTargetMachine>().getSubtargetImpl();
35930d56a7bSJustin Holewinski
360bbd38dd9SBob Wilson addPass(createLowerAggrCopies());
361bbd38dd9SBob Wilson addPass(createAllocaHoisting());
362bbd38dd9SBob Wilson addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
36330d56a7bSJustin Holewinski
36430d56a7bSJustin Holewinski if (!ST.hasImageHandles())
36530d56a7bSJustin Holewinski addPass(createNVPTXReplaceImageHandlesPass());
36630d56a7bSJustin Holewinski
367ae556d3eSJustin Holewinski return false;
368ae556d3eSJustin Holewinski }
369ae556d3eSJustin Holewinski
addPreRegAlloc()37049fac56eSJustin Lebar void NVPTXPassConfig::addPreRegAlloc() {
37149fac56eSJustin Lebar // Remove Proxy Register pseudo instructions used to keep `callseq_end` alive.
37249fac56eSJustin Lebar addPass(createNVPTXProxyRegErasurePass());
37349fac56eSJustin Lebar }
37449fac56eSJustin Lebar
addPostRegAlloc()3757e37a5f5SMatthias Braun void NVPTXPassConfig::addPostRegAlloc() {
376012248b0SJay Foad addPass(createNVPTXPrologEpilogPass());
377c1b9d47bSJingyue Wu if (getOptLevel() != CodeGenOpt::None) {
37877b5b385SJingyue Wu // NVPTXPrologEpilogPass calculates frame object offset and replace frame
37977b5b385SJingyue Wu // index with VRFrame register. NVPTXPeephole need to be run after that and
38077b5b385SJingyue Wu // will replace VRFrame with VRFrameLocal when possible.
38177b5b385SJingyue Wu addPass(createNVPTXPeephole());
382dbb3b2f4SJustin Holewinski }
383c1b9d47bSJingyue Wu }
384dbb3b2f4SJustin Holewinski
createTargetRegisterAllocator(bool)385fae7ff12SBenjamin Kramer FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
386062a2baeSCraig Topper return nullptr; // No reg alloc
387fae7ff12SBenjamin Kramer }
388fae7ff12SBenjamin Kramer
addFastRegAlloc()389cf55a657SMatt Arsenault void NVPTXPassConfig::addFastRegAlloc() {
390a51418c1SJustin Holewinski addPass(&PHIEliminationID);
391a51418c1SJustin Holewinski addPass(&TwoAddressInstructionPassID);
392dbb3b2f4SJustin Holewinski }
393dbb3b2f4SJustin Holewinski
addOptimizedRegAlloc()394cf55a657SMatt Arsenault void NVPTXPassConfig::addOptimizedRegAlloc() {
395a51418c1SJustin Holewinski addPass(&ProcessImplicitDefsID);
396a51418c1SJustin Holewinski addPass(&LiveVariablesID);
397a51418c1SJustin Holewinski addPass(&MachineLoopInfoID);
398a51418c1SJustin Holewinski addPass(&PHIEliminationID);
399a51418c1SJustin Holewinski
400a51418c1SJustin Holewinski addPass(&TwoAddressInstructionPassID);
401a51418c1SJustin Holewinski addPass(&RegisterCoalescerID);
402a51418c1SJustin Holewinski
403a51418c1SJustin Holewinski // PreRA instruction scheduling.
404a51418c1SJustin Holewinski if (addPass(&MachineSchedulerID))
405a51418c1SJustin Holewinski printAndVerify("After Machine Scheduling");
406a51418c1SJustin Holewinski
407a51418c1SJustin Holewinski
408a51418c1SJustin Holewinski addPass(&StackSlotColoringID);
409a51418c1SJustin Holewinski
410a51418c1SJustin Holewinski // FIXME: Needs physical registers
4114a7c8e7aSMatthias Braun //addPass(&MachineLICMID);
412a51418c1SJustin Holewinski
413a51418c1SJustin Holewinski printAndVerify("After StackSlotColoring");
414dbb3b2f4SJustin Holewinski }
4156dca8398SJustin Holewinski
addMachineSSAOptimization()4166dca8398SJustin Holewinski void NVPTXPassConfig::addMachineSSAOptimization() {
4176dca8398SJustin Holewinski // Pre-ra tail duplication.
4186dca8398SJustin Holewinski if (addPass(&EarlyTailDuplicateID))
4196dca8398SJustin Holewinski printAndVerify("After Pre-RegAlloc TailDuplicate");
4206dca8398SJustin Holewinski
4216dca8398SJustin Holewinski // Optimize PHIs before DCE: removing dead PHI cycles may make more
4226dca8398SJustin Holewinski // instructions dead.
4236dca8398SJustin Holewinski addPass(&OptimizePHIsID);
4246dca8398SJustin Holewinski
4256dca8398SJustin Holewinski // This pass merges large allocas. StackSlotColoring is a different pass
4266dca8398SJustin Holewinski // which merges spill slots.
4276dca8398SJustin Holewinski addPass(&StackColoringID);
4286dca8398SJustin Holewinski
4296dca8398SJustin Holewinski // If the target requests it, assign local variables to stack slots relative
4306dca8398SJustin Holewinski // to one another and simplify frame index references where possible.
4316dca8398SJustin Holewinski addPass(&LocalStackSlotAllocationID);
4326dca8398SJustin Holewinski
4336dca8398SJustin Holewinski // With optimization, dead code should already be eliminated. However
4346dca8398SJustin Holewinski // there is one known exception: lowered code for arguments that are only
4356dca8398SJustin Holewinski // used by tail calls, where the tail calls reuse the incoming stack
4366dca8398SJustin Holewinski // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
4376dca8398SJustin Holewinski addPass(&DeadMachineInstructionElimID);
4386dca8398SJustin Holewinski printAndVerify("After codegen DCE pass");
4396dca8398SJustin Holewinski
4406dca8398SJustin Holewinski // Allow targets to insert passes that improve instruction level parallelism,
4416dca8398SJustin Holewinski // like if-conversion. Such passes will typically need dominator trees and
4426dca8398SJustin Holewinski // loop info, just like LICM and CSE below.
4436dca8398SJustin Holewinski if (addILPOpts())
4446dca8398SJustin Holewinski printAndVerify("After ILP optimizations");
4456dca8398SJustin Holewinski
4464a7c8e7aSMatthias Braun addPass(&EarlyMachineLICMID);
4476dca8398SJustin Holewinski addPass(&MachineCSEID);
4486dca8398SJustin Holewinski
4496dca8398SJustin Holewinski addPass(&MachineSinkingID);
4506dca8398SJustin Holewinski printAndVerify("After Machine LICM, CSE and Sinking passes");
4516dca8398SJustin Holewinski
4526dca8398SJustin Holewinski addPass(&PeepholeOptimizerID);
4536dca8398SJustin Holewinski printAndVerify("After codegen peephole optimization pass");
4546dca8398SJustin Holewinski }
455