18f0fd8f6SDimitry Andric //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
28f0fd8f6SDimitry Andric //
38f0fd8f6SDimitry Andric //                     The LLVM Compiler Infrastructure
48f0fd8f6SDimitry Andric //
58f0fd8f6SDimitry Andric // This file is distributed under the University of Illinois Open Source
68f0fd8f6SDimitry Andric // License. See LICENSE.TXT for details.
78f0fd8f6SDimitry Andric //
88f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
98f0fd8f6SDimitry Andric //
108f0fd8f6SDimitry Andric /// \file
114ba319b5SDimitry Andric /// The AMDGPU target machine contains all of the hardware specific
128f0fd8f6SDimitry Andric /// information  needed to emit code for R600 and SI GPUs.
138f0fd8f6SDimitry Andric //
148f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
158f0fd8f6SDimitry Andric 
168f0fd8f6SDimitry Andric #include "AMDGPUTargetMachine.h"
178f0fd8f6SDimitry Andric #include "AMDGPU.h"
187a7e6055SDimitry Andric #include "AMDGPUAliasAnalysis.h"
193ca95b02SDimitry Andric #include "AMDGPUCallLowering.h"
207a7e6055SDimitry Andric #include "AMDGPUInstructionSelector.h"
217a7e6055SDimitry Andric #include "AMDGPULegalizerInfo.h"
22c4394386SDimitry Andric #include "AMDGPUMacroFusion.h"
233ca95b02SDimitry Andric #include "AMDGPUTargetObjectFile.h"
248f0fd8f6SDimitry Andric #include "AMDGPUTargetTransformInfo.h"
257a7e6055SDimitry Andric #include "GCNIterativeScheduler.h"
26d88c1a5aSDimitry Andric #include "GCNSchedStrategy.h"
278f0fd8f6SDimitry Andric #include "R600MachineScheduler.h"
28d88c1a5aSDimitry Andric #include "SIMachineScheduler.h"
293ca95b02SDimitry Andric #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
30db17bf38SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
317a7e6055SDimitry Andric #include "llvm/CodeGen/GlobalISel/Legalizer.h"
327a7e6055SDimitry Andric #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
338f0fd8f6SDimitry Andric #include "llvm/CodeGen/Passes.h"
343ca95b02SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
35d88c1a5aSDimitry Andric #include "llvm/IR/Attributes.h"
36d88c1a5aSDimitry Andric #include "llvm/IR/Function.h"
37d88c1a5aSDimitry Andric #include "llvm/IR/LegacyPassManager.h"
38d88c1a5aSDimitry Andric #include "llvm/Pass.h"
39d88c1a5aSDimitry Andric #include "llvm/Support/CommandLine.h"
40d88c1a5aSDimitry Andric #include "llvm/Support/Compiler.h"
41db17bf38SDimitry Andric #include "llvm/Support/TargetRegistry.h"
424ba319b5SDimitry Andric #include "llvm/Target/TargetLoweringObjectFile.h"
43db17bf38SDimitry Andric #include "llvm/Transforms/IPO.h"
44db17bf38SDimitry Andric #include "llvm/Transforms/IPO/AlwaysInliner.h"
45db17bf38SDimitry Andric #include "llvm/Transforms/IPO/PassManagerBuilder.h"
46db17bf38SDimitry Andric #include "llvm/Transforms/Scalar.h"
47db17bf38SDimitry Andric #include "llvm/Transforms/Scalar/GVN.h"
48*b5893f02SDimitry Andric #include "llvm/Transforms/Utils.h"
49db17bf38SDimitry Andric #include "llvm/Transforms/Vectorize.h"
50d88c1a5aSDimitry Andric #include <memory>
518f0fd8f6SDimitry Andric 
528f0fd8f6SDimitry Andric using namespace llvm;
538f0fd8f6SDimitry Andric 
543ca95b02SDimitry Andric static cl::opt<bool> EnableR600StructurizeCFG(
553ca95b02SDimitry Andric   "r600-ir-structurize",
563ca95b02SDimitry Andric   cl::desc("Use StructurizeCFG IR pass"),
573ca95b02SDimitry Andric   cl::init(true));
583ca95b02SDimitry Andric 
593ca95b02SDimitry Andric static cl::opt<bool> EnableSROA(
603ca95b02SDimitry Andric   "amdgpu-sroa",
613ca95b02SDimitry Andric   cl::desc("Run SROA after promote alloca pass"),
623ca95b02SDimitry Andric   cl::ReallyHidden,
633ca95b02SDimitry Andric   cl::init(true));
643ca95b02SDimitry Andric 
657a7e6055SDimitry Andric static cl::opt<bool>
667a7e6055SDimitry Andric EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
677a7e6055SDimitry Andric                         cl::desc("Run early if-conversion"),
687a7e6055SDimitry Andric                         cl::init(false));
697a7e6055SDimitry Andric 
703ca95b02SDimitry Andric static cl::opt<bool> EnableR600IfConvert(
713ca95b02SDimitry Andric   "r600-if-convert",
723ca95b02SDimitry Andric   cl::desc("Use if conversion pass"),
733ca95b02SDimitry Andric   cl::ReallyHidden,
743ca95b02SDimitry Andric   cl::init(true));
753ca95b02SDimitry Andric 
763ca95b02SDimitry Andric // Option to disable vectorizer for tests.
773ca95b02SDimitry Andric static cl::opt<bool> EnableLoadStoreVectorizer(
783ca95b02SDimitry Andric   "amdgpu-load-store-vectorizer",
793ca95b02SDimitry Andric   cl::desc("Enable load store vectorizer"),
80d88c1a5aSDimitry Andric   cl::init(true),
81d88c1a5aSDimitry Andric   cl::Hidden);
82d88c1a5aSDimitry Andric 
834ba319b5SDimitry Andric // Option to control global loads scalarization
84d88c1a5aSDimitry Andric static cl::opt<bool> ScalarizeGlobal(
85d88c1a5aSDimitry Andric   "amdgpu-scalarize-global-loads",
86d88c1a5aSDimitry Andric   cl::desc("Enable global load scalarization"),
87c4394386SDimitry Andric   cl::init(true),
883ca95b02SDimitry Andric   cl::Hidden);
893ca95b02SDimitry Andric 
907a7e6055SDimitry Andric // Option to run internalize pass.
917a7e6055SDimitry Andric static cl::opt<bool> InternalizeSymbols(
927a7e6055SDimitry Andric   "amdgpu-internalize-symbols",
937a7e6055SDimitry Andric   cl::desc("Enable elimination of non-kernel functions and unused globals"),
947a7e6055SDimitry Andric   cl::init(false),
957a7e6055SDimitry Andric   cl::Hidden);
967a7e6055SDimitry Andric 
977a7e6055SDimitry Andric // Option to inline all early.
987a7e6055SDimitry Andric static cl::opt<bool> EarlyInlineAll(
997a7e6055SDimitry Andric   "amdgpu-early-inline-all",
1007a7e6055SDimitry Andric   cl::desc("Inline all functions early"),
1017a7e6055SDimitry Andric   cl::init(false),
1027a7e6055SDimitry Andric   cl::Hidden);
1037a7e6055SDimitry Andric 
1047a7e6055SDimitry Andric static cl::opt<bool> EnableSDWAPeephole(
1057a7e6055SDimitry Andric   "amdgpu-sdwa-peephole",
1067a7e6055SDimitry Andric   cl::desc("Enable SDWA peepholer"),
1077a7e6055SDimitry Andric   cl::init(true));
1087a7e6055SDimitry Andric 
109*b5893f02SDimitry Andric static cl::opt<bool> EnableDPPCombine(
110*b5893f02SDimitry Andric   "amdgpu-dpp-combine",
111*b5893f02SDimitry Andric   cl::desc("Enable DPP combiner"),
112*b5893f02SDimitry Andric   cl::init(false));
113*b5893f02SDimitry Andric 
1147a7e6055SDimitry Andric // Enable address space based alias analysis
1157a7e6055SDimitry Andric static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
1167a7e6055SDimitry Andric   cl::desc("Enable AMDGPU Alias Analysis"),
1177a7e6055SDimitry Andric   cl::init(true));
1187a7e6055SDimitry Andric 
1195517e702SDimitry Andric // Option to run late CFG structurizer
1202cab237bSDimitry Andric static cl::opt<bool, true> LateCFGStructurize(
1215517e702SDimitry Andric   "amdgpu-late-structurize",
1225517e702SDimitry Andric   cl::desc("Enable late CFG structurization"),
1232cab237bSDimitry Andric   cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
1242cab237bSDimitry Andric   cl::Hidden);
1252cab237bSDimitry Andric 
1264ba319b5SDimitry Andric static cl::opt<bool, true> EnableAMDGPUFunctionCalls(
1272cab237bSDimitry Andric   "amdgpu-function-calls",
1282cab237bSDimitry Andric   cl::desc("Enable AMDGPU function call support"),
1294ba319b5SDimitry Andric   cl::location(AMDGPUTargetMachine::EnableFunctionCalls),
1304ba319b5SDimitry Andric   cl::init(false),
1314ba319b5SDimitry Andric   cl::Hidden);
1322cab237bSDimitry Andric 
1332cab237bSDimitry Andric // Enable lib calls simplifications
1342cab237bSDimitry Andric static cl::opt<bool> EnableLibCallSimplify(
1352cab237bSDimitry Andric   "amdgpu-simplify-libcall",
1364ba319b5SDimitry Andric   cl::desc("Enable amdgpu library simplifications"),
1374ba319b5SDimitry Andric   cl::init(true),
1384ba319b5SDimitry Andric   cl::Hidden);
1394ba319b5SDimitry Andric 
1404ba319b5SDimitry Andric static cl::opt<bool> EnableLowerKernelArguments(
1414ba319b5SDimitry Andric   "amdgpu-ir-lower-kernel-arguments",
1424ba319b5SDimitry Andric   cl::desc("Lower kernel argument loads in IR pass"),
1432cab237bSDimitry Andric   cl::init(true),
1445517e702SDimitry Andric   cl::Hidden);
1455517e702SDimitry Andric 
146*b5893f02SDimitry Andric // Enable atomic optimization
147*b5893f02SDimitry Andric static cl::opt<bool> EnableAtomicOptimizations(
148*b5893f02SDimitry Andric   "amdgpu-atomic-optimizations",
149*b5893f02SDimitry Andric   cl::desc("Enable atomic optimizations"),
150*b5893f02SDimitry Andric   cl::init(false),
151*b5893f02SDimitry Andric   cl::Hidden);
152*b5893f02SDimitry Andric 
153*b5893f02SDimitry Andric // Enable Mode register optimization
154*b5893f02SDimitry Andric static cl::opt<bool> EnableSIModeRegisterPass(
155*b5893f02SDimitry Andric   "amdgpu-mode-register",
156*b5893f02SDimitry Andric   cl::desc("Enable mode register pass"),
157*b5893f02SDimitry Andric   cl::init(true),
158*b5893f02SDimitry Andric   cl::Hidden);
159*b5893f02SDimitry Andric 
LLVMInitializeAMDGPUTarget()1608f0fd8f6SDimitry Andric extern "C" void LLVMInitializeAMDGPUTarget() {
1618f0fd8f6SDimitry Andric   // Register the target
162d88c1a5aSDimitry Andric   RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
163d88c1a5aSDimitry Andric   RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
1647d523365SDimitry Andric 
1657d523365SDimitry Andric   PassRegistry *PR = PassRegistry::getPassRegistry();
1662cab237bSDimitry Andric   initializeR600ClauseMergePassPass(*PR);
1672cab237bSDimitry Andric   initializeR600ControlFlowFinalizerPass(*PR);
1682cab237bSDimitry Andric   initializeR600PacketizerPass(*PR);
1692cab237bSDimitry Andric   initializeR600ExpandSpecialInstrsPassPass(*PR);
1702cab237bSDimitry Andric   initializeR600VectorRegMergerPass(*PR);
1716ccc06f6SDimitry Andric   initializeGlobalISel(*PR);
1722cab237bSDimitry Andric   initializeAMDGPUDAGToDAGISelPass(*PR);
173*b5893f02SDimitry Andric   initializeGCNDPPCombinePass(*PR);
1747d523365SDimitry Andric   initializeSILowerI1CopiesPass(*PR);
1757d523365SDimitry Andric   initializeSIFixSGPRCopiesPass(*PR);
1767a7e6055SDimitry Andric   initializeSIFixVGPRCopiesPass(*PR);
177*b5893f02SDimitry Andric   initializeSIFixupVectorISelPass(*PR);
1787d523365SDimitry Andric   initializeSIFoldOperandsPass(*PR);
1797a7e6055SDimitry Andric   initializeSIPeepholeSDWAPass(*PR);
1803ca95b02SDimitry Andric   initializeSIShrinkInstructionsPass(*PR);
1812cab237bSDimitry Andric   initializeSIOptimizeExecMaskingPreRAPass(*PR);
1827d523365SDimitry Andric   initializeSILoadStoreOptimizerPass(*PR);
183*b5893f02SDimitry Andric   initializeAMDGPUFixFunctionBitcastsPass(*PR);
1846d97bb29SDimitry Andric   initializeAMDGPUAlwaysInlinePass(*PR);
1857d523365SDimitry Andric   initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
1867d523365SDimitry Andric   initializeAMDGPUAnnotateUniformValuesPass(*PR);
1872cab237bSDimitry Andric   initializeAMDGPUArgumentUsageInfoPass(*PR);
188*b5893f02SDimitry Andric   initializeAMDGPUAtomicOptimizerPass(*PR);
1894ba319b5SDimitry Andric   initializeAMDGPULowerKernelArgumentsPass(*PR);
1904ba319b5SDimitry Andric   initializeAMDGPULowerKernelAttributesPass(*PR);
1917a7e6055SDimitry Andric   initializeAMDGPULowerIntrinsicsPass(*PR);
1922cab237bSDimitry Andric   initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
1933ca95b02SDimitry Andric   initializeAMDGPUPromoteAllocaPass(*PR);
1943ca95b02SDimitry Andric   initializeAMDGPUCodeGenPreparePass(*PR);
1952cab237bSDimitry Andric   initializeAMDGPURewriteOutArgumentsPass(*PR);
196d88c1a5aSDimitry Andric   initializeAMDGPUUnifyMetadataPass(*PR);
1973ca95b02SDimitry Andric   initializeSIAnnotateControlFlowPass(*PR);
1987a7e6055SDimitry Andric   initializeSIInsertWaitcntsPass(*PR);
199*b5893f02SDimitry Andric   initializeSIModeRegisterPass(*PR);
2003ca95b02SDimitry Andric   initializeSIWholeQuadModePass(*PR);
2013ca95b02SDimitry Andric   initializeSILowerControlFlowPass(*PR);
202d88c1a5aSDimitry Andric   initializeSIInsertSkipsPass(*PR);
2032cab237bSDimitry Andric   initializeSIMemoryLegalizerPass(*PR);
2043ca95b02SDimitry Andric   initializeSIDebuggerInsertNopsPass(*PR);
205d88c1a5aSDimitry Andric   initializeSIOptimizeExecMaskingPass(*PR);
2062cab237bSDimitry Andric   initializeSIFixWWMLivenessPass(*PR);
2074ba319b5SDimitry Andric   initializeSIFormMemoryClausesPass(*PR);
2087a7e6055SDimitry Andric   initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
2097a7e6055SDimitry Andric   initializeAMDGPUAAWrapperPassPass(*PR);
210*b5893f02SDimitry Andric   initializeAMDGPUExternalAAWrapperPass(*PR);
2112cab237bSDimitry Andric   initializeAMDGPUUseNativeCallsPass(*PR);
2122cab237bSDimitry Andric   initializeAMDGPUSimplifyLibCallsPass(*PR);
2132cab237bSDimitry Andric   initializeAMDGPUInlinerPass(*PR);
2147d523365SDimitry Andric }
2157d523365SDimitry Andric 
createTLOF(const Triple & TT)2167d523365SDimitry Andric static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
217d88c1a5aSDimitry Andric   return llvm::make_unique<AMDGPUTargetObjectFile>();
2188f0fd8f6SDimitry Andric }
2198f0fd8f6SDimitry Andric 
createR600MachineScheduler(MachineSchedContext * C)2208f0fd8f6SDimitry Andric static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
221d88c1a5aSDimitry Andric   return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
222d88c1a5aSDimitry Andric }
223d88c1a5aSDimitry Andric 
createSIMachineScheduler(MachineSchedContext * C)224d88c1a5aSDimitry Andric static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
225d88c1a5aSDimitry Andric   return new SIScheduleDAGMI(C);
226d88c1a5aSDimitry Andric }
227d88c1a5aSDimitry Andric 
228d88c1a5aSDimitry Andric static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext * C)229d88c1a5aSDimitry Andric createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
230d88c1a5aSDimitry Andric   ScheduleDAGMILive *DAG =
2317a7e6055SDimitry Andric     new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
232d88c1a5aSDimitry Andric   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
233d88c1a5aSDimitry Andric   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
234c4394386SDimitry Andric   DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
235d88c1a5aSDimitry Andric   return DAG;
2368f0fd8f6SDimitry Andric }
2378f0fd8f6SDimitry Andric 
2387a7e6055SDimitry Andric static ScheduleDAGInstrs *
createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext * C)2397a7e6055SDimitry Andric createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
2407a7e6055SDimitry Andric   auto DAG = new GCNIterativeScheduler(C,
2417a7e6055SDimitry Andric     GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
2427a7e6055SDimitry Andric   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
2437a7e6055SDimitry Andric   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
2447a7e6055SDimitry Andric   return DAG;
2457a7e6055SDimitry Andric }
2467a7e6055SDimitry Andric 
createMinRegScheduler(MachineSchedContext * C)2477a7e6055SDimitry Andric static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
2487a7e6055SDimitry Andric   return new GCNIterativeScheduler(C,
2497a7e6055SDimitry Andric     GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
2507a7e6055SDimitry Andric }
2517a7e6055SDimitry Andric 
2522cab237bSDimitry Andric static ScheduleDAGInstrs *
createIterativeILPMachineScheduler(MachineSchedContext * C)2532cab237bSDimitry Andric createIterativeILPMachineScheduler(MachineSchedContext *C) {
2542cab237bSDimitry Andric   auto DAG = new GCNIterativeScheduler(C,
2552cab237bSDimitry Andric     GCNIterativeScheduler::SCHEDULE_ILP);
2562cab237bSDimitry Andric   DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
2572cab237bSDimitry Andric   DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
2582cab237bSDimitry Andric   DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
2592cab237bSDimitry Andric   return DAG;
2602cab237bSDimitry Andric }
2612cab237bSDimitry Andric 
2628f0fd8f6SDimitry Andric static MachineSchedRegistry
263444ed5c5SDimitry Andric R600SchedRegistry("r600", "Run R600's custom scheduler",
2648f0fd8f6SDimitry Andric                    createR600MachineScheduler);
2658f0fd8f6SDimitry Andric 
266444ed5c5SDimitry Andric static MachineSchedRegistry
267444ed5c5SDimitry Andric SISchedRegistry("si", "Run SI's custom scheduler",
268444ed5c5SDimitry Andric                 createSIMachineScheduler);
269444ed5c5SDimitry Andric 
270d88c1a5aSDimitry Andric static MachineSchedRegistry
271d88c1a5aSDimitry Andric GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
272d88c1a5aSDimitry Andric                              "Run GCN scheduler to maximize occupancy",
273d88c1a5aSDimitry Andric                              createGCNMaxOccupancyMachineScheduler);
274d88c1a5aSDimitry Andric 
2757a7e6055SDimitry Andric static MachineSchedRegistry
2767a7e6055SDimitry Andric IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
2777a7e6055SDimitry Andric   "Run GCN scheduler to maximize occupancy (experimental)",
2787a7e6055SDimitry Andric   createIterativeGCNMaxOccupancyMachineScheduler);
2797a7e6055SDimitry Andric 
2807a7e6055SDimitry Andric static MachineSchedRegistry
2817a7e6055SDimitry Andric GCNMinRegSchedRegistry("gcn-minreg",
2827a7e6055SDimitry Andric   "Run GCN iterative scheduler for minimal register usage (experimental)",
2837a7e6055SDimitry Andric   createMinRegScheduler);
2847a7e6055SDimitry Andric 
2852cab237bSDimitry Andric static MachineSchedRegistry
2862cab237bSDimitry Andric GCNILPSchedRegistry("gcn-ilp",
2872cab237bSDimitry Andric   "Run GCN iterative scheduler for ILP scheduling (experimental)",
2882cab237bSDimitry Andric   createIterativeILPMachineScheduler);
2892cab237bSDimitry Andric 
computeDataLayout(const Triple & TT)2903ca95b02SDimitry Andric static StringRef computeDataLayout(const Triple &TT) {
2913ca95b02SDimitry Andric   if (TT.getArch() == Triple::r600) {
2923ca95b02SDimitry Andric     // 32-bit pointers.
2932cab237bSDimitry Andric       return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
2944ba319b5SDimitry Andric              "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
2958f0fd8f6SDimitry Andric   }
2968f0fd8f6SDimitry Andric 
2973ca95b02SDimitry Andric   // 32-bit private, local, and region pointers. 64-bit global, constant and
2983ca95b02SDimitry Andric   // flat.
2994ba319b5SDimitry Andric     return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
3007a7e6055SDimitry Andric          "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
3014ba319b5SDimitry Andric          "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
3023ca95b02SDimitry Andric }
3038f0fd8f6SDimitry Andric 
3043ca95b02SDimitry Andric LLVM_READNONE
getGPUOrDefault(const Triple & TT,StringRef GPU)3053ca95b02SDimitry Andric static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
3063ca95b02SDimitry Andric   if (!GPU.empty())
3073ca95b02SDimitry Andric     return GPU;
3083ca95b02SDimitry Andric 
3093ca95b02SDimitry Andric   if (TT.getArch() == Triple::amdgcn)
3102cab237bSDimitry Andric     return "generic";
3113ca95b02SDimitry Andric 
3123ca95b02SDimitry Andric   return "r600";
3133ca95b02SDimitry Andric }
3143ca95b02SDimitry Andric 
getEffectiveRelocModel(Optional<Reloc::Model> RM)3153ca95b02SDimitry Andric static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
3163ca95b02SDimitry Andric   // The AMDGPU toolchain only supports generating shared objects, so we
3173ca95b02SDimitry Andric   // must always use PIC.
3183ca95b02SDimitry Andric   return Reloc::PIC_;
3198f0fd8f6SDimitry Andric }
3208f0fd8f6SDimitry Andric 
AMDGPUTargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,TargetOptions Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OptLevel)3218f0fd8f6SDimitry Andric AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
3228f0fd8f6SDimitry Andric                                          StringRef CPU, StringRef FS,
3233ca95b02SDimitry Andric                                          TargetOptions Options,
3243ca95b02SDimitry Andric                                          Optional<Reloc::Model> RM,
3252cab237bSDimitry Andric                                          Optional<CodeModel::Model> CM,
3268f0fd8f6SDimitry Andric                                          CodeGenOpt::Level OptLevel)
3273ca95b02SDimitry Andric     : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
3282cab237bSDimitry Andric                         FS, Options, getEffectiveRelocModel(RM),
329*b5893f02SDimitry Andric                         getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
330d88c1a5aSDimitry Andric       TLOF(createTLOF(getTargetTriple())) {
3318f0fd8f6SDimitry Andric   initAsmInfo();
3328f0fd8f6SDimitry Andric }
3338f0fd8f6SDimitry Andric 
3342cab237bSDimitry Andric bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
3354ba319b5SDimitry Andric bool AMDGPUTargetMachine::EnableFunctionCalls = false;
3364ba319b5SDimitry Andric 
3374ba319b5SDimitry Andric AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
3382cab237bSDimitry Andric 
getGPUName(const Function & F) const3393ca95b02SDimitry Andric StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
3403ca95b02SDimitry Andric   Attribute GPUAttr = F.getFnAttribute("target-cpu");
3413ca95b02SDimitry Andric   return GPUAttr.hasAttribute(Attribute::None) ?
3423ca95b02SDimitry Andric     getTargetCPU() : GPUAttr.getValueAsString();
3433ca95b02SDimitry Andric }
3443ca95b02SDimitry Andric 
getFeatureString(const Function & F) const3453ca95b02SDimitry Andric StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
3463ca95b02SDimitry Andric   Attribute FSAttr = F.getFnAttribute("target-features");
3473ca95b02SDimitry Andric 
3483ca95b02SDimitry Andric   return FSAttr.hasAttribute(Attribute::None) ?
3493ca95b02SDimitry Andric     getTargetFeatureString() :
3503ca95b02SDimitry Andric     FSAttr.getValueAsString();
3513ca95b02SDimitry Andric }
3523ca95b02SDimitry Andric 
3532cab237bSDimitry Andric /// Predicate for Internalize pass.
mustPreserveGV(const GlobalValue & GV)3542cab237bSDimitry Andric static bool mustPreserveGV(const GlobalValue &GV) {
3552cab237bSDimitry Andric   if (const Function *F = dyn_cast<Function>(&GV))
3562cab237bSDimitry Andric     return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
3572cab237bSDimitry Andric 
3582cab237bSDimitry Andric   return !GV.use_empty();
3592cab237bSDimitry Andric }
3602cab237bSDimitry Andric 
adjustPassManager(PassManagerBuilder & Builder)3617a7e6055SDimitry Andric void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
3627a7e6055SDimitry Andric   Builder.DivergentTarget = true;
3637a7e6055SDimitry Andric 
3642cab237bSDimitry Andric   bool EnableOpt = getOptLevel() > CodeGenOpt::None;
3652cab237bSDimitry Andric   bool Internalize = InternalizeSymbols;
3662cab237bSDimitry Andric   bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableAMDGPUFunctionCalls;
3672cab237bSDimitry Andric   bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
3682cab237bSDimitry Andric   bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
3692cab237bSDimitry Andric 
3702cab237bSDimitry Andric   if (EnableAMDGPUFunctionCalls) {
3712cab237bSDimitry Andric     delete Builder.Inliner;
3722cab237bSDimitry Andric     Builder.Inliner = createAMDGPUFunctionInliningPass();
3732cab237bSDimitry Andric   }
3742cab237bSDimitry Andric 
3757a7e6055SDimitry Andric   Builder.addExtension(
3767a7e6055SDimitry Andric     PassManagerBuilder::EP_ModuleOptimizerEarly,
3777a7e6055SDimitry Andric     [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
3787a7e6055SDimitry Andric                                          legacy::PassManagerBase &PM) {
3797a7e6055SDimitry Andric       if (AMDGPUAA) {
3807a7e6055SDimitry Andric         PM.add(createAMDGPUAAWrapperPass());
3817a7e6055SDimitry Andric         PM.add(createAMDGPUExternalAAWrapperPass());
3827a7e6055SDimitry Andric       }
383d88c1a5aSDimitry Andric       PM.add(createAMDGPUUnifyMetadataPass());
3847a7e6055SDimitry Andric       if (Internalize) {
3852cab237bSDimitry Andric         PM.add(createInternalizePass(mustPreserveGV));
3867a7e6055SDimitry Andric         PM.add(createGlobalDCEPass());
3877a7e6055SDimitry Andric       }
3887a7e6055SDimitry Andric       if (EarlyInline)
3897a7e6055SDimitry Andric         PM.add(createAMDGPUAlwaysInlinePass(false));
3907a7e6055SDimitry Andric   });
3917a7e6055SDimitry Andric 
3922cab237bSDimitry Andric   const auto &Opt = Options;
3937a7e6055SDimitry Andric   Builder.addExtension(
3947a7e6055SDimitry Andric     PassManagerBuilder::EP_EarlyAsPossible,
3952cab237bSDimitry Andric     [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
3962cab237bSDimitry Andric                                       legacy::PassManagerBase &PM) {
3977a7e6055SDimitry Andric       if (AMDGPUAA) {
3987a7e6055SDimitry Andric         PM.add(createAMDGPUAAWrapperPass());
3997a7e6055SDimitry Andric         PM.add(createAMDGPUExternalAAWrapperPass());
4007a7e6055SDimitry Andric       }
4012cab237bSDimitry Andric       PM.add(llvm::createAMDGPUUseNativeCallsPass());
4022cab237bSDimitry Andric       if (LibCallSimplify)
4032cab237bSDimitry Andric         PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
4047a7e6055SDimitry Andric   });
405edd7eaddSDimitry Andric 
406edd7eaddSDimitry Andric   Builder.addExtension(
407edd7eaddSDimitry Andric     PassManagerBuilder::EP_CGSCCOptimizerLate,
408edd7eaddSDimitry Andric     [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
409edd7eaddSDimitry Andric       // Add infer address spaces pass to the opt pipeline after inlining
410edd7eaddSDimitry Andric       // but before SROA to increase SROA opportunities.
411edd7eaddSDimitry Andric       PM.add(createInferAddressSpacesPass());
4124ba319b5SDimitry Andric 
4134ba319b5SDimitry Andric       // This should run after inlining to have any chance of doing anything,
4144ba319b5SDimitry Andric       // and before other cleanup optimizations.
4154ba319b5SDimitry Andric       PM.add(createAMDGPULowerKernelAttributesPass());
416edd7eaddSDimitry Andric   });
417d88c1a5aSDimitry Andric }
418d88c1a5aSDimitry Andric 
4198f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4208f0fd8f6SDimitry Andric // R600 Target Machine (R600 -> Cayman)
4218f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4228f0fd8f6SDimitry Andric 
R600TargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,TargetOptions Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OL,bool JIT)4238f0fd8f6SDimitry Andric R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
4243ca95b02SDimitry Andric                                      StringRef CPU, StringRef FS,
4253ca95b02SDimitry Andric                                      TargetOptions Options,
4263ca95b02SDimitry Andric                                      Optional<Reloc::Model> RM,
4272cab237bSDimitry Andric                                      Optional<CodeModel::Model> CM,
4282cab237bSDimitry Andric                                      CodeGenOpt::Level OL, bool JIT)
429d88c1a5aSDimitry Andric     : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
430d88c1a5aSDimitry Andric   setRequiresStructuredCFG(true);
431d88c1a5aSDimitry Andric }
4323ca95b02SDimitry Andric 
getSubtargetImpl(const Function & F) const4333ca95b02SDimitry Andric const R600Subtarget *R600TargetMachine::getSubtargetImpl(
4343ca95b02SDimitry Andric   const Function &F) const {
4353ca95b02SDimitry Andric   StringRef GPU = getGPUName(F);
4363ca95b02SDimitry Andric   StringRef FS = getFeatureString(F);
4373ca95b02SDimitry Andric 
4383ca95b02SDimitry Andric   SmallString<128> SubtargetKey(GPU);
4393ca95b02SDimitry Andric   SubtargetKey.append(FS);
4403ca95b02SDimitry Andric 
4413ca95b02SDimitry Andric   auto &I = SubtargetMap[SubtargetKey];
4423ca95b02SDimitry Andric   if (!I) {
4433ca95b02SDimitry Andric     // This needs to be done before we create a new subtarget since any
4443ca95b02SDimitry Andric     // creation will depend on the TM and the code generation flags on the
4453ca95b02SDimitry Andric     // function that reside in TargetOptions.
4463ca95b02SDimitry Andric     resetTargetOptions(F);
4473ca95b02SDimitry Andric     I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
4483ca95b02SDimitry Andric   }
4493ca95b02SDimitry Andric 
4503ca95b02SDimitry Andric   return I.get();
4513ca95b02SDimitry Andric }
4528f0fd8f6SDimitry Andric 
4534ba319b5SDimitry Andric TargetTransformInfo
getTargetTransformInfo(const Function & F)4544ba319b5SDimitry Andric R600TargetMachine::getTargetTransformInfo(const Function &F) {
4554ba319b5SDimitry Andric   return TargetTransformInfo(R600TTIImpl(this, F));
4564ba319b5SDimitry Andric }
4574ba319b5SDimitry Andric 
4588f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4598f0fd8f6SDimitry Andric // GCN Target Machine (SI+)
4608f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4618f0fd8f6SDimitry Andric 
GCNTargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,TargetOptions Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OL,bool JIT)4628f0fd8f6SDimitry Andric GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
4633ca95b02SDimitry Andric                                    StringRef CPU, StringRef FS,
4643ca95b02SDimitry Andric                                    TargetOptions Options,
4653ca95b02SDimitry Andric                                    Optional<Reloc::Model> RM,
4662cab237bSDimitry Andric                                    Optional<CodeModel::Model> CM,
4672cab237bSDimitry Andric                                    CodeGenOpt::Level OL, bool JIT)
4683ca95b02SDimitry Andric     : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
4693ca95b02SDimitry Andric 
getSubtargetImpl(const Function & F) const4704ba319b5SDimitry Andric const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
4713ca95b02SDimitry Andric   StringRef GPU = getGPUName(F);
4723ca95b02SDimitry Andric   StringRef FS = getFeatureString(F);
4733ca95b02SDimitry Andric 
4743ca95b02SDimitry Andric   SmallString<128> SubtargetKey(GPU);
4753ca95b02SDimitry Andric   SubtargetKey.append(FS);
4763ca95b02SDimitry Andric 
4773ca95b02SDimitry Andric   auto &I = SubtargetMap[SubtargetKey];
4783ca95b02SDimitry Andric   if (!I) {
4793ca95b02SDimitry Andric     // This needs to be done before we create a new subtarget since any
4803ca95b02SDimitry Andric     // creation will depend on the TM and the code generation flags on the
4813ca95b02SDimitry Andric     // function that reside in TargetOptions.
4823ca95b02SDimitry Andric     resetTargetOptions(F);
4834ba319b5SDimitry Andric     I = llvm::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
4843ca95b02SDimitry Andric   }
4853ca95b02SDimitry Andric 
486d88c1a5aSDimitry Andric   I->setScalarizeGlobalBehavior(ScalarizeGlobal);
487d88c1a5aSDimitry Andric 
4883ca95b02SDimitry Andric   return I.get();
4893ca95b02SDimitry Andric }
4908f0fd8f6SDimitry Andric 
4914ba319b5SDimitry Andric TargetTransformInfo
getTargetTransformInfo(const Function & F)4924ba319b5SDimitry Andric GCNTargetMachine::getTargetTransformInfo(const Function &F) {
4934ba319b5SDimitry Andric   return TargetTransformInfo(GCNTTIImpl(this, F));
4944ba319b5SDimitry Andric }
4954ba319b5SDimitry Andric 
4968f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4978f0fd8f6SDimitry Andric // AMDGPU Pass Setup
4988f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4998f0fd8f6SDimitry Andric 
5008f0fd8f6SDimitry Andric namespace {
5013ca95b02SDimitry Andric 
5028f0fd8f6SDimitry Andric class AMDGPUPassConfig : public TargetPassConfig {
5038f0fd8f6SDimitry Andric public:
AMDGPUPassConfig(LLVMTargetMachine & TM,PassManagerBase & PM)504f9448bf3SDimitry Andric   AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
5057d523365SDimitry Andric     : TargetPassConfig(TM, PM) {
5067d523365SDimitry Andric     // Exceptions and StackMaps are not supported, so these passes will never do
5077d523365SDimitry Andric     // anything.
5087d523365SDimitry Andric     disablePass(&StackMapLivenessID);
5097d523365SDimitry Andric     disablePass(&FuncletLayoutID);
5107d523365SDimitry Andric   }
5118f0fd8f6SDimitry Andric 
getAMDGPUTargetMachine() const5128f0fd8f6SDimitry Andric   AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
5138f0fd8f6SDimitry Andric     return getTM<AMDGPUTargetMachine>();
5148f0fd8f6SDimitry Andric   }
5158f0fd8f6SDimitry Andric 
516d88c1a5aSDimitry Andric   ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext * C) const517d88c1a5aSDimitry Andric   createMachineScheduler(MachineSchedContext *C) const override {
518d88c1a5aSDimitry Andric     ScheduleDAGMILive *DAG = createGenericSchedLive(C);
519d88c1a5aSDimitry Andric     DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
520d88c1a5aSDimitry Andric     DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
521d88c1a5aSDimitry Andric     return DAG;
522d88c1a5aSDimitry Andric   }
523d88c1a5aSDimitry Andric 
5243ca95b02SDimitry Andric   void addEarlyCSEOrGVNPass();
5253ca95b02SDimitry Andric   void addStraightLineScalarOptimizationPasses();
5268f0fd8f6SDimitry Andric   void addIRPasses() override;
5278f0fd8f6SDimitry Andric   void addCodeGenPrepare() override;
5287d523365SDimitry Andric   bool addPreISel() override;
5297d523365SDimitry Andric   bool addInstSelector() override;
5307d523365SDimitry Andric   bool addGCPasses() override;
5318f0fd8f6SDimitry Andric };
5328f0fd8f6SDimitry Andric 
5333ca95b02SDimitry Andric class R600PassConfig final : public AMDGPUPassConfig {
5348f0fd8f6SDimitry Andric public:
R600PassConfig(LLVMTargetMachine & TM,PassManagerBase & PM)535f9448bf3SDimitry Andric   R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
5368f0fd8f6SDimitry Andric     : AMDGPUPassConfig(TM, PM) {}
5378f0fd8f6SDimitry Andric 
createMachineScheduler(MachineSchedContext * C) const5383ca95b02SDimitry Andric   ScheduleDAGInstrs *createMachineScheduler(
5393ca95b02SDimitry Andric     MachineSchedContext *C) const override {
5403ca95b02SDimitry Andric     return createR600MachineScheduler(C);
5413ca95b02SDimitry Andric   }
5423ca95b02SDimitry Andric 
5438f0fd8f6SDimitry Andric   bool addPreISel() override;
5442cab237bSDimitry Andric   bool addInstSelector() override;
5458f0fd8f6SDimitry Andric   void addPreRegAlloc() override;
5468f0fd8f6SDimitry Andric   void addPreSched2() override;
5478f0fd8f6SDimitry Andric   void addPreEmitPass() override;
5488f0fd8f6SDimitry Andric };
5498f0fd8f6SDimitry Andric 
5503ca95b02SDimitry Andric class GCNPassConfig final : public AMDGPUPassConfig {
5518f0fd8f6SDimitry Andric public:
GCNPassConfig(LLVMTargetMachine & TM,PassManagerBase & PM)552f9448bf3SDimitry Andric   GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
5532cab237bSDimitry Andric     : AMDGPUPassConfig(TM, PM) {
5542cab237bSDimitry Andric     // It is necessary to know the register usage of the entire call graph.  We
5552cab237bSDimitry Andric     // allow calls without EnableAMDGPUFunctionCalls if they are marked
5562cab237bSDimitry Andric     // noinline, so this is always required.
5572cab237bSDimitry Andric     setRequiresCodeGenSCCOrder(true);
5582cab237bSDimitry Andric   }
5593ca95b02SDimitry Andric 
getGCNTargetMachine() const5603ca95b02SDimitry Andric   GCNTargetMachine &getGCNTargetMachine() const {
5613ca95b02SDimitry Andric     return getTM<GCNTargetMachine>();
5623ca95b02SDimitry Andric   }
5633ca95b02SDimitry Andric 
5643ca95b02SDimitry Andric   ScheduleDAGInstrs *
5653ca95b02SDimitry Andric   createMachineScheduler(MachineSchedContext *C) const override;
5663ca95b02SDimitry Andric 
5678f0fd8f6SDimitry Andric   bool addPreISel() override;
5683ca95b02SDimitry Andric   void addMachineSSAOptimization() override;
5697a7e6055SDimitry Andric   bool addILPOpts() override;
5708f0fd8f6SDimitry Andric   bool addInstSelector() override;
5713ca95b02SDimitry Andric   bool addIRTranslator() override;
572d88c1a5aSDimitry Andric   bool addLegalizeMachineIR() override;
5733ca95b02SDimitry Andric   bool addRegBankSelect() override;
574d88c1a5aSDimitry Andric   bool addGlobalInstructionSelect() override;
5757d523365SDimitry Andric   void addFastRegAlloc(FunctionPass *RegAllocPass) override;
5767d523365SDimitry Andric   void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
5778f0fd8f6SDimitry Andric   void addPreRegAlloc() override;
578d88c1a5aSDimitry Andric   void addPostRegAlloc() override;
5798f0fd8f6SDimitry Andric   void addPreSched2() override;
5808f0fd8f6SDimitry Andric   void addPreEmitPass() override;
5818f0fd8f6SDimitry Andric };
5828f0fd8f6SDimitry Andric 
583d88c1a5aSDimitry Andric } // end anonymous namespace
5848f0fd8f6SDimitry Andric 
addEarlyCSEOrGVNPass()5853ca95b02SDimitry Andric void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
5863ca95b02SDimitry Andric   if (getOptLevel() == CodeGenOpt::Aggressive)
5873ca95b02SDimitry Andric     addPass(createGVNPass());
5883ca95b02SDimitry Andric   else
5893ca95b02SDimitry Andric     addPass(createEarlyCSEPass());
5903ca95b02SDimitry Andric }
5913ca95b02SDimitry Andric 
addStraightLineScalarOptimizationPasses()5923ca95b02SDimitry Andric void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
5934ba319b5SDimitry Andric   addPass(createLICMPass());
5943ca95b02SDimitry Andric   addPass(createSeparateConstOffsetFromGEPPass());
5953ca95b02SDimitry Andric   addPass(createSpeculativeExecutionPass());
5963ca95b02SDimitry Andric   // ReassociateGEPs exposes more opportunites for SLSR. See
5973ca95b02SDimitry Andric   // the example in reassociate-geps-and-slsr.ll.
5983ca95b02SDimitry Andric   addPass(createStraightLineStrengthReducePass());
5993ca95b02SDimitry Andric   // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
6003ca95b02SDimitry Andric   // EarlyCSE can reuse.
6013ca95b02SDimitry Andric   addEarlyCSEOrGVNPass();
6023ca95b02SDimitry Andric   // Run NaryReassociate after EarlyCSE/GVN to be more effective.
6033ca95b02SDimitry Andric   addPass(createNaryReassociatePass());
6043ca95b02SDimitry Andric   // NaryReassociate on GEPs creates redundant common expressions, so run
6053ca95b02SDimitry Andric   // EarlyCSE after it.
6063ca95b02SDimitry Andric   addPass(createEarlyCSEPass());
6073ca95b02SDimitry Andric }
6083ca95b02SDimitry Andric 
addIRPasses()6098f0fd8f6SDimitry Andric void AMDGPUPassConfig::addIRPasses() {
6107a7e6055SDimitry Andric   const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
6117a7e6055SDimitry Andric 
6123ca95b02SDimitry Andric   // There is no reason to run these.
6133ca95b02SDimitry Andric   disablePass(&StackMapLivenessID);
6143ca95b02SDimitry Andric   disablePass(&FuncletLayoutID);
6153ca95b02SDimitry Andric   disablePass(&PatchableFunctionID);
6163ca95b02SDimitry Andric 
617*b5893f02SDimitry Andric   addPass(createAtomicExpandPass());
618*b5893f02SDimitry Andric 
619*b5893f02SDimitry Andric   // This must occur before inlining, as the inliner will not look through
620*b5893f02SDimitry Andric   // bitcast calls.
621*b5893f02SDimitry Andric   addPass(createAMDGPUFixFunctionBitcastsPass());
622*b5893f02SDimitry Andric 
623d8866befSDimitry Andric   addPass(createAMDGPULowerIntrinsicsPass());
6247a7e6055SDimitry Andric 
6258f0fd8f6SDimitry Andric   // Function calls are not supported, so make sure we inline everything.
6268f0fd8f6SDimitry Andric   addPass(createAMDGPUAlwaysInlinePass());
627d88c1a5aSDimitry Andric   addPass(createAlwaysInlinerLegacyPass());
6288f0fd8f6SDimitry Andric   // We need to add the barrier noop pass, otherwise adding the function
6298f0fd8f6SDimitry Andric   // inlining pass will cause all of the PassConfigs passes to be run
6308f0fd8f6SDimitry Andric   // one function at a time, which means if we have a nodule with two
6318f0fd8f6SDimitry Andric   // functions, then we will generate code for the first function
6328f0fd8f6SDimitry Andric   // without ever running any passes on the second.
6338f0fd8f6SDimitry Andric   addPass(createBarrierNoopPass());
6347d523365SDimitry Andric 
6357a7e6055SDimitry Andric   if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
6367a7e6055SDimitry Andric     // TODO: May want to move later or split into an early and late one.
6377a7e6055SDimitry Andric 
638d8866befSDimitry Andric     addPass(createAMDGPUCodeGenPreparePass());
6397a7e6055SDimitry Andric   }
6407a7e6055SDimitry Andric 
6417d523365SDimitry Andric   // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
6424ba319b5SDimitry Andric   if (TM.getTargetTriple().getArch() == Triple::r600)
6434ba319b5SDimitry Andric     addPass(createR600OpenCLImageTypeLoweringPass());
6447d523365SDimitry Andric 
6452cab237bSDimitry Andric   // Replace OpenCL enqueued block function pointers with global variables.
6462cab237bSDimitry Andric   addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass());
6472cab237bSDimitry Andric 
6483ca95b02SDimitry Andric   if (TM.getOptLevel() > CodeGenOpt::None) {
6497a7e6055SDimitry Andric     addPass(createInferAddressSpacesPass());
650d8866befSDimitry Andric     addPass(createAMDGPUPromoteAlloca());
6513ca95b02SDimitry Andric 
6523ca95b02SDimitry Andric     if (EnableSROA)
6533ca95b02SDimitry Andric       addPass(createSROAPass());
6543ca95b02SDimitry Andric 
6553ca95b02SDimitry Andric     addStraightLineScalarOptimizationPasses();
6567a7e6055SDimitry Andric 
6577a7e6055SDimitry Andric     if (EnableAMDGPUAliasAnalysis) {
6587a7e6055SDimitry Andric       addPass(createAMDGPUAAWrapperPass());
6597a7e6055SDimitry Andric       addPass(createExternalAAWrapperPass([](Pass &P, Function &,
6607a7e6055SDimitry Andric                                              AAResults &AAR) {
6617a7e6055SDimitry Andric         if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
6627a7e6055SDimitry Andric           AAR.addAAResult(WrapperPass->getResult());
6637a7e6055SDimitry Andric         }));
6647a7e6055SDimitry Andric     }
665d88c1a5aSDimitry Andric   }
6663ca95b02SDimitry Andric 
6678f0fd8f6SDimitry Andric   TargetPassConfig::addIRPasses();
6683ca95b02SDimitry Andric 
6693ca95b02SDimitry Andric   // EarlyCSE is not always strong enough to clean up what LSR produces. For
6703ca95b02SDimitry Andric   // example, GVN can combine
6713ca95b02SDimitry Andric   //
6723ca95b02SDimitry Andric   //   %0 = add %a, %b
6733ca95b02SDimitry Andric   //   %1 = add %b, %a
6743ca95b02SDimitry Andric   //
6753ca95b02SDimitry Andric   // and
6763ca95b02SDimitry Andric   //
6773ca95b02SDimitry Andric   //   %0 = shl nsw %a, 2
6783ca95b02SDimitry Andric   //   %1 = shl %a, 2
6793ca95b02SDimitry Andric   //
6803ca95b02SDimitry Andric   // but EarlyCSE can do neither of them.
6813ca95b02SDimitry Andric   if (getOptLevel() != CodeGenOpt::None)
6823ca95b02SDimitry Andric     addEarlyCSEOrGVNPass();
6838f0fd8f6SDimitry Andric }
6848f0fd8f6SDimitry Andric 
addCodeGenPrepare()6858f0fd8f6SDimitry Andric void AMDGPUPassConfig::addCodeGenPrepare() {
6864ba319b5SDimitry Andric   if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
6874ba319b5SDimitry Andric       EnableLowerKernelArguments)
6884ba319b5SDimitry Andric     addPass(createAMDGPULowerKernelArgumentsPass());
6894ba319b5SDimitry Andric 
6908f0fd8f6SDimitry Andric   TargetPassConfig::addCodeGenPrepare();
6913ca95b02SDimitry Andric 
6923ca95b02SDimitry Andric   if (EnableLoadStoreVectorizer)
6933ca95b02SDimitry Andric     addPass(createLoadStoreVectorizerPass());
6948f0fd8f6SDimitry Andric }
6958f0fd8f6SDimitry Andric 
addPreISel()6963ca95b02SDimitry Andric bool AMDGPUPassConfig::addPreISel() {
697*b5893f02SDimitry Andric   addPass(createLowerSwitchPass());
6988f0fd8f6SDimitry Andric   addPass(createFlattenCFGPass());
6998f0fd8f6SDimitry Andric   return false;
7008f0fd8f6SDimitry Andric }
7018f0fd8f6SDimitry Andric 
addInstSelector()7028f0fd8f6SDimitry Andric bool AMDGPUPassConfig::addInstSelector() {
7032cab237bSDimitry Andric   addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
7048f0fd8f6SDimitry Andric   return false;
7058f0fd8f6SDimitry Andric }
7068f0fd8f6SDimitry Andric 
addGCPasses()7077d523365SDimitry Andric bool AMDGPUPassConfig::addGCPasses() {
7087d523365SDimitry Andric   // Do nothing. GC is not supported.
7097d523365SDimitry Andric   return false;
7107d523365SDimitry Andric }
7117d523365SDimitry Andric 
7128f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
7138f0fd8f6SDimitry Andric // R600 Pass Setup
7148f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
7158f0fd8f6SDimitry Andric 
addPreISel()7168f0fd8f6SDimitry Andric bool R600PassConfig::addPreISel() {
7178f0fd8f6SDimitry Andric   AMDGPUPassConfig::addPreISel();
7183ca95b02SDimitry Andric 
7193ca95b02SDimitry Andric   if (EnableR600StructurizeCFG)
7203ca95b02SDimitry Andric     addPass(createStructurizeCFGPass());
7218f0fd8f6SDimitry Andric   return false;
7228f0fd8f6SDimitry Andric }
7238f0fd8f6SDimitry Andric 
addInstSelector()7242cab237bSDimitry Andric bool R600PassConfig::addInstSelector() {
7252cab237bSDimitry Andric   addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
7262cab237bSDimitry Andric   return false;
7272cab237bSDimitry Andric }
7282cab237bSDimitry Andric 
addPreRegAlloc()7298f0fd8f6SDimitry Andric void R600PassConfig::addPreRegAlloc() {
730d8866befSDimitry Andric   addPass(createR600VectorRegMerger());
7318f0fd8f6SDimitry Andric }
7328f0fd8f6SDimitry Andric 
addPreSched2()7338f0fd8f6SDimitry Andric void R600PassConfig::addPreSched2() {
7348f0fd8f6SDimitry Andric   addPass(createR600EmitClauseMarkers(), false);
7353ca95b02SDimitry Andric   if (EnableR600IfConvert)
7368f0fd8f6SDimitry Andric     addPass(&IfConverterID, false);
737d8866befSDimitry Andric   addPass(createR600ClauseMergePass(), false);
7388f0fd8f6SDimitry Andric }
7398f0fd8f6SDimitry Andric 
addPreEmitPass()7408f0fd8f6SDimitry Andric void R600PassConfig::addPreEmitPass() {
7418f0fd8f6SDimitry Andric   addPass(createAMDGPUCFGStructurizerPass(), false);
742d8866befSDimitry Andric   addPass(createR600ExpandSpecialInstrsPass(), false);
7438f0fd8f6SDimitry Andric   addPass(&FinalizeMachineBundlesID, false);
744d8866befSDimitry Andric   addPass(createR600Packetizer(), false);
745d8866befSDimitry Andric   addPass(createR600ControlFlowFinalizer(), false);
7468f0fd8f6SDimitry Andric }
7478f0fd8f6SDimitry Andric 
createPassConfig(PassManagerBase & PM)7488f0fd8f6SDimitry Andric TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
749f9448bf3SDimitry Andric   return new R600PassConfig(*this, PM);
7508f0fd8f6SDimitry Andric }
7518f0fd8f6SDimitry Andric 
7528f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
7538f0fd8f6SDimitry Andric // GCN Pass Setup
7548f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
7558f0fd8f6SDimitry Andric 
createMachineScheduler(MachineSchedContext * C) const7563ca95b02SDimitry Andric ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
7573ca95b02SDimitry Andric   MachineSchedContext *C) const {
7584ba319b5SDimitry Andric   const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
7593ca95b02SDimitry Andric   if (ST.enableSIScheduler())
7603ca95b02SDimitry Andric     return createSIMachineScheduler(C);
761d88c1a5aSDimitry Andric   return createGCNMaxOccupancyMachineScheduler(C);
7623ca95b02SDimitry Andric }
7633ca95b02SDimitry Andric 
addPreISel()7648f0fd8f6SDimitry Andric bool GCNPassConfig::addPreISel() {
7658f0fd8f6SDimitry Andric   AMDGPUPassConfig::addPreISel();
7667d523365SDimitry Andric 
767*b5893f02SDimitry Andric   if (EnableAtomicOptimizations) {
768*b5893f02SDimitry Andric     addPass(createAMDGPUAtomicOptimizerPass());
769*b5893f02SDimitry Andric   }
770*b5893f02SDimitry Andric 
7717d523365SDimitry Andric   // FIXME: We need to run a pass to propagate the attributes when calls are
7727d523365SDimitry Andric   // supported.
773d8866befSDimitry Andric   addPass(createAMDGPUAnnotateKernelFeaturesPass());
7747a7e6055SDimitry Andric 
7757a7e6055SDimitry Andric   // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
7767a7e6055SDimitry Andric   // regions formed by them.
7777a7e6055SDimitry Andric   addPass(&AMDGPUUnifyDivergentExitNodesID);
7785517e702SDimitry Andric   if (!LateCFGStructurize) {
7793ca95b02SDimitry Andric     addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
7805517e702SDimitry Andric   }
7818f0fd8f6SDimitry Andric   addPass(createSinkingPass());
7827d523365SDimitry Andric   addPass(createAMDGPUAnnotateUniformValues());
7835517e702SDimitry Andric   if (!LateCFGStructurize) {
7843ca95b02SDimitry Andric     addPass(createSIAnnotateControlFlowPass());
7855517e702SDimitry Andric   }
7867d523365SDimitry Andric 
7878f0fd8f6SDimitry Andric   return false;
7888f0fd8f6SDimitry Andric }
7898f0fd8f6SDimitry Andric 
addMachineSSAOptimization()7903ca95b02SDimitry Andric void GCNPassConfig::addMachineSSAOptimization() {
7913ca95b02SDimitry Andric   TargetPassConfig::addMachineSSAOptimization();
7923ca95b02SDimitry Andric 
7933ca95b02SDimitry Andric   // We want to fold operands after PeepholeOptimizer has run (or as part of
7943ca95b02SDimitry Andric   // it), because it will eliminate extra copies making it easier to fold the
7953ca95b02SDimitry Andric   // real source operand. We want to eliminate dead instructions after, so that
7963ca95b02SDimitry Andric   // we see fewer uses of the copies. We then need to clean up the dead
7973ca95b02SDimitry Andric   // instructions leftover after the operands are folded as well.
7983ca95b02SDimitry Andric   //
7993ca95b02SDimitry Andric   // XXX - Can we get away without running DeadMachineInstructionElim again?
8003ca95b02SDimitry Andric   addPass(&SIFoldOperandsID);
801*b5893f02SDimitry Andric   if (EnableDPPCombine)
802*b5893f02SDimitry Andric     addPass(&GCNDPPCombineID);
8033ca95b02SDimitry Andric   addPass(&DeadMachineInstructionElimID);
804d88c1a5aSDimitry Andric   addPass(&SILoadStoreOptimizerID);
8057a7e6055SDimitry Andric   if (EnableSDWAPeephole) {
8067a7e6055SDimitry Andric     addPass(&SIPeepholeSDWAID);
8074ba319b5SDimitry Andric     addPass(&EarlyMachineLICMID);
80889cb50c9SDimitry Andric     addPass(&MachineCSEID);
80989cb50c9SDimitry Andric     addPass(&SIFoldOperandsID);
8107a7e6055SDimitry Andric     addPass(&DeadMachineInstructionElimID);
8117a7e6055SDimitry Andric   }
812db17bf38SDimitry Andric   addPass(createSIShrinkInstructionsPass());
8133ca95b02SDimitry Andric }
8143ca95b02SDimitry Andric 
addILPOpts()8157a7e6055SDimitry Andric bool GCNPassConfig::addILPOpts() {
8167a7e6055SDimitry Andric   if (EnableEarlyIfConversion)
8177a7e6055SDimitry Andric     addPass(&EarlyIfConverterID);
8186c4bc1bdSDimitry Andric 
8197a7e6055SDimitry Andric   TargetPassConfig::addILPOpts();
8207a7e6055SDimitry Andric   return false;
8216c4bc1bdSDimitry Andric }
8226c4bc1bdSDimitry Andric 
addInstSelector()8238f0fd8f6SDimitry Andric bool GCNPassConfig::addInstSelector() {
8248f0fd8f6SDimitry Andric   AMDGPUPassConfig::addInstSelector();
8257d523365SDimitry Andric   addPass(&SIFixSGPRCopiesID);
826*b5893f02SDimitry Andric   addPass(createSILowerI1CopiesPass());
827*b5893f02SDimitry Andric   addPass(createSIFixupVectorISelPass());
828*b5893f02SDimitry Andric   addPass(createSIAddIMGInitPass());
8298f0fd8f6SDimitry Andric   return false;
8308f0fd8f6SDimitry Andric }
8318f0fd8f6SDimitry Andric 
addIRTranslator()8323ca95b02SDimitry Andric bool GCNPassConfig::addIRTranslator() {
8333ca95b02SDimitry Andric   addPass(new IRTranslator());
8343ca95b02SDimitry Andric   return false;
8353ca95b02SDimitry Andric }
8368f0fd8f6SDimitry Andric 
addLegalizeMachineIR()837d88c1a5aSDimitry Andric bool GCNPassConfig::addLegalizeMachineIR() {
8387a7e6055SDimitry Andric   addPass(new Legalizer());
839d88c1a5aSDimitry Andric   return false;
840d88c1a5aSDimitry Andric }
841d88c1a5aSDimitry Andric 
addRegBankSelect()8423ca95b02SDimitry Andric bool GCNPassConfig::addRegBankSelect() {
8437a7e6055SDimitry Andric   addPass(new RegBankSelect());
8443ca95b02SDimitry Andric   return false;
8453ca95b02SDimitry Andric }
846d88c1a5aSDimitry Andric 
addGlobalInstructionSelect()847d88c1a5aSDimitry Andric bool GCNPassConfig::addGlobalInstructionSelect() {
8487a7e6055SDimitry Andric   addPass(new InstructionSelect());
849d88c1a5aSDimitry Andric   return false;
850d88c1a5aSDimitry Andric }
8517a7e6055SDimitry Andric 
addPreRegAlloc()8523ca95b02SDimitry Andric void GCNPassConfig::addPreRegAlloc() {
8535517e702SDimitry Andric   if (LateCFGStructurize) {
8545517e702SDimitry Andric     addPass(createAMDGPUMachineCFGStructurizerPass());
8555517e702SDimitry Andric   }
8563ca95b02SDimitry Andric   addPass(createSIWholeQuadModePass());
8577d523365SDimitry Andric }
8587d523365SDimitry Andric 
addFastRegAlloc(FunctionPass * RegAllocPass)8597d523365SDimitry Andric void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
860d88c1a5aSDimitry Andric   // FIXME: We have to disable the verifier here because of PHIElimination +
861d88c1a5aSDimitry Andric   // TwoAddressInstructions disabling it.
862d88c1a5aSDimitry Andric 
863d88c1a5aSDimitry Andric   // This must be run immediately after phi elimination and before
864d88c1a5aSDimitry Andric   // TwoAddressInstructions, otherwise the processing of the tied operand of
865d88c1a5aSDimitry Andric   // SI_ELSE will introduce a copy of the tied operand source after the else.
866d88c1a5aSDimitry Andric   insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
867d88c1a5aSDimitry Andric 
8682cab237bSDimitry Andric   // This must be run after SILowerControlFlow, since it needs to use the
8692cab237bSDimitry Andric   // machine-level CFG, but before register allocation.
8702cab237bSDimitry Andric   insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
8712cab237bSDimitry Andric 
8727d523365SDimitry Andric   TargetPassConfig::addFastRegAlloc(RegAllocPass);
8737d523365SDimitry Andric }
8747d523365SDimitry Andric 
addOptimizedRegAlloc(FunctionPass * RegAllocPass)8757d523365SDimitry Andric void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
8762cab237bSDimitry Andric   insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
877d88c1a5aSDimitry Andric 
8784ba319b5SDimitry Andric   insertPass(&SIOptimizeExecMaskingPreRAID, &SIFormMemoryClausesID);
8794ba319b5SDimitry Andric 
880d88c1a5aSDimitry Andric   // This must be run immediately after phi elimination and before
881d88c1a5aSDimitry Andric   // TwoAddressInstructions, otherwise the processing of the tied operand of
882d88c1a5aSDimitry Andric   // SI_ELSE will introduce a copy of the tied operand source after the else.
883d88c1a5aSDimitry Andric   insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
884d88c1a5aSDimitry Andric 
8852cab237bSDimitry Andric   // This must be run after SILowerControlFlow, since it needs to use the
8862cab237bSDimitry Andric   // machine-level CFG, but before register allocation.
8872cab237bSDimitry Andric   insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
8882cab237bSDimitry Andric 
8897d523365SDimitry Andric   TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
8908f0fd8f6SDimitry Andric }
8918f0fd8f6SDimitry Andric 
addPostRegAlloc()892d88c1a5aSDimitry Andric void GCNPassConfig::addPostRegAlloc() {
8937a7e6055SDimitry Andric   addPass(&SIFixVGPRCopiesID);
894*b5893f02SDimitry Andric   if (getOptLevel() > CodeGenOpt::None)
895d88c1a5aSDimitry Andric     addPass(&SIOptimizeExecMaskingID);
896d88c1a5aSDimitry Andric   TargetPassConfig::addPostRegAlloc();
897d88c1a5aSDimitry Andric }
898d88c1a5aSDimitry Andric 
addPreSched2()8998f0fd8f6SDimitry Andric void GCNPassConfig::addPreSched2() {
9008f0fd8f6SDimitry Andric }
9018f0fd8f6SDimitry Andric 
addPreEmitPass()9028f0fd8f6SDimitry Andric void GCNPassConfig::addPreEmitPass() {
9034ba319b5SDimitry Andric   addPass(createSIMemoryLegalizerPass());
9044ba319b5SDimitry Andric   addPass(createSIInsertWaitcntsPass());
9054ba319b5SDimitry Andric   addPass(createSIShrinkInstructionsPass());
906*b5893f02SDimitry Andric   addPass(createSIModeRegisterPass());
9074ba319b5SDimitry Andric 
9083ca95b02SDimitry Andric   // The hazard recognizer that runs as part of the post-ra scheduler does not
9093ca95b02SDimitry Andric   // guarantee to be able handle all hazards correctly. This is because if there
9103ca95b02SDimitry Andric   // are multiple scheduling regions in a basic block, the regions are scheduled
9113ca95b02SDimitry Andric   // bottom up, so when we begin to schedule a region we don't know what
9123ca95b02SDimitry Andric   // instructions were emitted directly before it.
9133ca95b02SDimitry Andric   //
9143ca95b02SDimitry Andric   // Here we add a stand-alone hazard recognizer pass which can handle all
9153ca95b02SDimitry Andric   // cases.
9164ba319b5SDimitry Andric   //
9174ba319b5SDimitry Andric   // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
9184ba319b5SDimitry Andric   // be better for it to emit S_NOP <N> when possible.
9193ca95b02SDimitry Andric   addPass(&PostRAHazardRecognizerID);
9203ca95b02SDimitry Andric 
921d88c1a5aSDimitry Andric   addPass(&SIInsertSkipsPassID);
9223ca95b02SDimitry Andric   addPass(createSIDebuggerInsertNopsPass());
923d88c1a5aSDimitry Andric   addPass(&BranchRelaxationPassID);
9248f0fd8f6SDimitry Andric }
9258f0fd8f6SDimitry Andric 
createPassConfig(PassManagerBase & PM)9268f0fd8f6SDimitry Andric TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
927f9448bf3SDimitry Andric   return new GCNPassConfig(*this, PM);
9288f0fd8f6SDimitry Andric }
929