18f0fd8f6SDimitry Andric //===-- AMDGPUTargetMachine.cpp - TargetMachine for hw codegen targets-----===//
28f0fd8f6SDimitry Andric //
38f0fd8f6SDimitry Andric // The LLVM Compiler Infrastructure
48f0fd8f6SDimitry Andric //
58f0fd8f6SDimitry Andric // This file is distributed under the University of Illinois Open Source
68f0fd8f6SDimitry Andric // License. See LICENSE.TXT for details.
78f0fd8f6SDimitry Andric //
88f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
98f0fd8f6SDimitry Andric //
108f0fd8f6SDimitry Andric /// \file
114ba319b5SDimitry Andric /// The AMDGPU target machine contains all of the hardware specific
128f0fd8f6SDimitry Andric /// information needed to emit code for R600 and SI GPUs.
138f0fd8f6SDimitry Andric //
148f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
158f0fd8f6SDimitry Andric
168f0fd8f6SDimitry Andric #include "AMDGPUTargetMachine.h"
178f0fd8f6SDimitry Andric #include "AMDGPU.h"
187a7e6055SDimitry Andric #include "AMDGPUAliasAnalysis.h"
193ca95b02SDimitry Andric #include "AMDGPUCallLowering.h"
207a7e6055SDimitry Andric #include "AMDGPUInstructionSelector.h"
217a7e6055SDimitry Andric #include "AMDGPULegalizerInfo.h"
22c4394386SDimitry Andric #include "AMDGPUMacroFusion.h"
233ca95b02SDimitry Andric #include "AMDGPUTargetObjectFile.h"
248f0fd8f6SDimitry Andric #include "AMDGPUTargetTransformInfo.h"
257a7e6055SDimitry Andric #include "GCNIterativeScheduler.h"
26d88c1a5aSDimitry Andric #include "GCNSchedStrategy.h"
278f0fd8f6SDimitry Andric #include "R600MachineScheduler.h"
28d88c1a5aSDimitry Andric #include "SIMachineScheduler.h"
293ca95b02SDimitry Andric #include "llvm/CodeGen/GlobalISel/IRTranslator.h"
30db17bf38SDimitry Andric #include "llvm/CodeGen/GlobalISel/InstructionSelect.h"
317a7e6055SDimitry Andric #include "llvm/CodeGen/GlobalISel/Legalizer.h"
327a7e6055SDimitry Andric #include "llvm/CodeGen/GlobalISel/RegBankSelect.h"
338f0fd8f6SDimitry Andric #include "llvm/CodeGen/Passes.h"
343ca95b02SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
35d88c1a5aSDimitry Andric #include "llvm/IR/Attributes.h"
36d88c1a5aSDimitry Andric #include "llvm/IR/Function.h"
37d88c1a5aSDimitry Andric #include "llvm/IR/LegacyPassManager.h"
38d88c1a5aSDimitry Andric #include "llvm/Pass.h"
39d88c1a5aSDimitry Andric #include "llvm/Support/CommandLine.h"
40d88c1a5aSDimitry Andric #include "llvm/Support/Compiler.h"
41db17bf38SDimitry Andric #include "llvm/Support/TargetRegistry.h"
424ba319b5SDimitry Andric #include "llvm/Target/TargetLoweringObjectFile.h"
43db17bf38SDimitry Andric #include "llvm/Transforms/IPO.h"
44db17bf38SDimitry Andric #include "llvm/Transforms/IPO/AlwaysInliner.h"
45db17bf38SDimitry Andric #include "llvm/Transforms/IPO/PassManagerBuilder.h"
46db17bf38SDimitry Andric #include "llvm/Transforms/Scalar.h"
47db17bf38SDimitry Andric #include "llvm/Transforms/Scalar/GVN.h"
48*b5893f02SDimitry Andric #include "llvm/Transforms/Utils.h"
49db17bf38SDimitry Andric #include "llvm/Transforms/Vectorize.h"
50d88c1a5aSDimitry Andric #include <memory>
518f0fd8f6SDimitry Andric
528f0fd8f6SDimitry Andric using namespace llvm;
538f0fd8f6SDimitry Andric
543ca95b02SDimitry Andric static cl::opt<bool> EnableR600StructurizeCFG(
553ca95b02SDimitry Andric "r600-ir-structurize",
563ca95b02SDimitry Andric cl::desc("Use StructurizeCFG IR pass"),
573ca95b02SDimitry Andric cl::init(true));
583ca95b02SDimitry Andric
593ca95b02SDimitry Andric static cl::opt<bool> EnableSROA(
603ca95b02SDimitry Andric "amdgpu-sroa",
613ca95b02SDimitry Andric cl::desc("Run SROA after promote alloca pass"),
623ca95b02SDimitry Andric cl::ReallyHidden,
633ca95b02SDimitry Andric cl::init(true));
643ca95b02SDimitry Andric
657a7e6055SDimitry Andric static cl::opt<bool>
667a7e6055SDimitry Andric EnableEarlyIfConversion("amdgpu-early-ifcvt", cl::Hidden,
677a7e6055SDimitry Andric cl::desc("Run early if-conversion"),
687a7e6055SDimitry Andric cl::init(false));
697a7e6055SDimitry Andric
703ca95b02SDimitry Andric static cl::opt<bool> EnableR600IfConvert(
713ca95b02SDimitry Andric "r600-if-convert",
723ca95b02SDimitry Andric cl::desc("Use if conversion pass"),
733ca95b02SDimitry Andric cl::ReallyHidden,
743ca95b02SDimitry Andric cl::init(true));
753ca95b02SDimitry Andric
763ca95b02SDimitry Andric // Option to disable vectorizer for tests.
773ca95b02SDimitry Andric static cl::opt<bool> EnableLoadStoreVectorizer(
783ca95b02SDimitry Andric "amdgpu-load-store-vectorizer",
793ca95b02SDimitry Andric cl::desc("Enable load store vectorizer"),
80d88c1a5aSDimitry Andric cl::init(true),
81d88c1a5aSDimitry Andric cl::Hidden);
82d88c1a5aSDimitry Andric
834ba319b5SDimitry Andric // Option to control global loads scalarization
84d88c1a5aSDimitry Andric static cl::opt<bool> ScalarizeGlobal(
85d88c1a5aSDimitry Andric "amdgpu-scalarize-global-loads",
86d88c1a5aSDimitry Andric cl::desc("Enable global load scalarization"),
87c4394386SDimitry Andric cl::init(true),
883ca95b02SDimitry Andric cl::Hidden);
893ca95b02SDimitry Andric
907a7e6055SDimitry Andric // Option to run internalize pass.
917a7e6055SDimitry Andric static cl::opt<bool> InternalizeSymbols(
927a7e6055SDimitry Andric "amdgpu-internalize-symbols",
937a7e6055SDimitry Andric cl::desc("Enable elimination of non-kernel functions and unused globals"),
947a7e6055SDimitry Andric cl::init(false),
957a7e6055SDimitry Andric cl::Hidden);
967a7e6055SDimitry Andric
977a7e6055SDimitry Andric // Option to inline all early.
987a7e6055SDimitry Andric static cl::opt<bool> EarlyInlineAll(
997a7e6055SDimitry Andric "amdgpu-early-inline-all",
1007a7e6055SDimitry Andric cl::desc("Inline all functions early"),
1017a7e6055SDimitry Andric cl::init(false),
1027a7e6055SDimitry Andric cl::Hidden);
1037a7e6055SDimitry Andric
1047a7e6055SDimitry Andric static cl::opt<bool> EnableSDWAPeephole(
1057a7e6055SDimitry Andric "amdgpu-sdwa-peephole",
1067a7e6055SDimitry Andric cl::desc("Enable SDWA peepholer"),
1077a7e6055SDimitry Andric cl::init(true));
1087a7e6055SDimitry Andric
109*b5893f02SDimitry Andric static cl::opt<bool> EnableDPPCombine(
110*b5893f02SDimitry Andric "amdgpu-dpp-combine",
111*b5893f02SDimitry Andric cl::desc("Enable DPP combiner"),
112*b5893f02SDimitry Andric cl::init(false));
113*b5893f02SDimitry Andric
1147a7e6055SDimitry Andric // Enable address space based alias analysis
1157a7e6055SDimitry Andric static cl::opt<bool> EnableAMDGPUAliasAnalysis("enable-amdgpu-aa", cl::Hidden,
1167a7e6055SDimitry Andric cl::desc("Enable AMDGPU Alias Analysis"),
1177a7e6055SDimitry Andric cl::init(true));
1187a7e6055SDimitry Andric
1195517e702SDimitry Andric // Option to run late CFG structurizer
1202cab237bSDimitry Andric static cl::opt<bool, true> LateCFGStructurize(
1215517e702SDimitry Andric "amdgpu-late-structurize",
1225517e702SDimitry Andric cl::desc("Enable late CFG structurization"),
1232cab237bSDimitry Andric cl::location(AMDGPUTargetMachine::EnableLateStructurizeCFG),
1242cab237bSDimitry Andric cl::Hidden);
1252cab237bSDimitry Andric
1264ba319b5SDimitry Andric static cl::opt<bool, true> EnableAMDGPUFunctionCalls(
1272cab237bSDimitry Andric "amdgpu-function-calls",
1282cab237bSDimitry Andric cl::desc("Enable AMDGPU function call support"),
1294ba319b5SDimitry Andric cl::location(AMDGPUTargetMachine::EnableFunctionCalls),
1304ba319b5SDimitry Andric cl::init(false),
1314ba319b5SDimitry Andric cl::Hidden);
1322cab237bSDimitry Andric
1332cab237bSDimitry Andric // Enable lib calls simplifications
1342cab237bSDimitry Andric static cl::opt<bool> EnableLibCallSimplify(
1352cab237bSDimitry Andric "amdgpu-simplify-libcall",
1364ba319b5SDimitry Andric cl::desc("Enable amdgpu library simplifications"),
1374ba319b5SDimitry Andric cl::init(true),
1384ba319b5SDimitry Andric cl::Hidden);
1394ba319b5SDimitry Andric
1404ba319b5SDimitry Andric static cl::opt<bool> EnableLowerKernelArguments(
1414ba319b5SDimitry Andric "amdgpu-ir-lower-kernel-arguments",
1424ba319b5SDimitry Andric cl::desc("Lower kernel argument loads in IR pass"),
1432cab237bSDimitry Andric cl::init(true),
1445517e702SDimitry Andric cl::Hidden);
1455517e702SDimitry Andric
146*b5893f02SDimitry Andric // Enable atomic optimization
147*b5893f02SDimitry Andric static cl::opt<bool> EnableAtomicOptimizations(
148*b5893f02SDimitry Andric "amdgpu-atomic-optimizations",
149*b5893f02SDimitry Andric cl::desc("Enable atomic optimizations"),
150*b5893f02SDimitry Andric cl::init(false),
151*b5893f02SDimitry Andric cl::Hidden);
152*b5893f02SDimitry Andric
153*b5893f02SDimitry Andric // Enable Mode register optimization
154*b5893f02SDimitry Andric static cl::opt<bool> EnableSIModeRegisterPass(
155*b5893f02SDimitry Andric "amdgpu-mode-register",
156*b5893f02SDimitry Andric cl::desc("Enable mode register pass"),
157*b5893f02SDimitry Andric cl::init(true),
158*b5893f02SDimitry Andric cl::Hidden);
159*b5893f02SDimitry Andric
LLVMInitializeAMDGPUTarget()1608f0fd8f6SDimitry Andric extern "C" void LLVMInitializeAMDGPUTarget() {
1618f0fd8f6SDimitry Andric // Register the target
162d88c1a5aSDimitry Andric RegisterTargetMachine<R600TargetMachine> X(getTheAMDGPUTarget());
163d88c1a5aSDimitry Andric RegisterTargetMachine<GCNTargetMachine> Y(getTheGCNTarget());
1647d523365SDimitry Andric
1657d523365SDimitry Andric PassRegistry *PR = PassRegistry::getPassRegistry();
1662cab237bSDimitry Andric initializeR600ClauseMergePassPass(*PR);
1672cab237bSDimitry Andric initializeR600ControlFlowFinalizerPass(*PR);
1682cab237bSDimitry Andric initializeR600PacketizerPass(*PR);
1692cab237bSDimitry Andric initializeR600ExpandSpecialInstrsPassPass(*PR);
1702cab237bSDimitry Andric initializeR600VectorRegMergerPass(*PR);
1716ccc06f6SDimitry Andric initializeGlobalISel(*PR);
1722cab237bSDimitry Andric initializeAMDGPUDAGToDAGISelPass(*PR);
173*b5893f02SDimitry Andric initializeGCNDPPCombinePass(*PR);
1747d523365SDimitry Andric initializeSILowerI1CopiesPass(*PR);
1757d523365SDimitry Andric initializeSIFixSGPRCopiesPass(*PR);
1767a7e6055SDimitry Andric initializeSIFixVGPRCopiesPass(*PR);
177*b5893f02SDimitry Andric initializeSIFixupVectorISelPass(*PR);
1787d523365SDimitry Andric initializeSIFoldOperandsPass(*PR);
1797a7e6055SDimitry Andric initializeSIPeepholeSDWAPass(*PR);
1803ca95b02SDimitry Andric initializeSIShrinkInstructionsPass(*PR);
1812cab237bSDimitry Andric initializeSIOptimizeExecMaskingPreRAPass(*PR);
1827d523365SDimitry Andric initializeSILoadStoreOptimizerPass(*PR);
183*b5893f02SDimitry Andric initializeAMDGPUFixFunctionBitcastsPass(*PR);
1846d97bb29SDimitry Andric initializeAMDGPUAlwaysInlinePass(*PR);
1857d523365SDimitry Andric initializeAMDGPUAnnotateKernelFeaturesPass(*PR);
1867d523365SDimitry Andric initializeAMDGPUAnnotateUniformValuesPass(*PR);
1872cab237bSDimitry Andric initializeAMDGPUArgumentUsageInfoPass(*PR);
188*b5893f02SDimitry Andric initializeAMDGPUAtomicOptimizerPass(*PR);
1894ba319b5SDimitry Andric initializeAMDGPULowerKernelArgumentsPass(*PR);
1904ba319b5SDimitry Andric initializeAMDGPULowerKernelAttributesPass(*PR);
1917a7e6055SDimitry Andric initializeAMDGPULowerIntrinsicsPass(*PR);
1922cab237bSDimitry Andric initializeAMDGPUOpenCLEnqueuedBlockLoweringPass(*PR);
1933ca95b02SDimitry Andric initializeAMDGPUPromoteAllocaPass(*PR);
1943ca95b02SDimitry Andric initializeAMDGPUCodeGenPreparePass(*PR);
1952cab237bSDimitry Andric initializeAMDGPURewriteOutArgumentsPass(*PR);
196d88c1a5aSDimitry Andric initializeAMDGPUUnifyMetadataPass(*PR);
1973ca95b02SDimitry Andric initializeSIAnnotateControlFlowPass(*PR);
1987a7e6055SDimitry Andric initializeSIInsertWaitcntsPass(*PR);
199*b5893f02SDimitry Andric initializeSIModeRegisterPass(*PR);
2003ca95b02SDimitry Andric initializeSIWholeQuadModePass(*PR);
2013ca95b02SDimitry Andric initializeSILowerControlFlowPass(*PR);
202d88c1a5aSDimitry Andric initializeSIInsertSkipsPass(*PR);
2032cab237bSDimitry Andric initializeSIMemoryLegalizerPass(*PR);
2043ca95b02SDimitry Andric initializeSIDebuggerInsertNopsPass(*PR);
205d88c1a5aSDimitry Andric initializeSIOptimizeExecMaskingPass(*PR);
2062cab237bSDimitry Andric initializeSIFixWWMLivenessPass(*PR);
2074ba319b5SDimitry Andric initializeSIFormMemoryClausesPass(*PR);
2087a7e6055SDimitry Andric initializeAMDGPUUnifyDivergentExitNodesPass(*PR);
2097a7e6055SDimitry Andric initializeAMDGPUAAWrapperPassPass(*PR);
210*b5893f02SDimitry Andric initializeAMDGPUExternalAAWrapperPass(*PR);
2112cab237bSDimitry Andric initializeAMDGPUUseNativeCallsPass(*PR);
2122cab237bSDimitry Andric initializeAMDGPUSimplifyLibCallsPass(*PR);
2132cab237bSDimitry Andric initializeAMDGPUInlinerPass(*PR);
2147d523365SDimitry Andric }
2157d523365SDimitry Andric
createTLOF(const Triple & TT)2167d523365SDimitry Andric static std::unique_ptr<TargetLoweringObjectFile> createTLOF(const Triple &TT) {
217d88c1a5aSDimitry Andric return llvm::make_unique<AMDGPUTargetObjectFile>();
2188f0fd8f6SDimitry Andric }
2198f0fd8f6SDimitry Andric
createR600MachineScheduler(MachineSchedContext * C)2208f0fd8f6SDimitry Andric static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) {
221d88c1a5aSDimitry Andric return new ScheduleDAGMILive(C, llvm::make_unique<R600SchedStrategy>());
222d88c1a5aSDimitry Andric }
223d88c1a5aSDimitry Andric
createSIMachineScheduler(MachineSchedContext * C)224d88c1a5aSDimitry Andric static ScheduleDAGInstrs *createSIMachineScheduler(MachineSchedContext *C) {
225d88c1a5aSDimitry Andric return new SIScheduleDAGMI(C);
226d88c1a5aSDimitry Andric }
227d88c1a5aSDimitry Andric
228d88c1a5aSDimitry Andric static ScheduleDAGInstrs *
createGCNMaxOccupancyMachineScheduler(MachineSchedContext * C)229d88c1a5aSDimitry Andric createGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
230d88c1a5aSDimitry Andric ScheduleDAGMILive *DAG =
2317a7e6055SDimitry Andric new GCNScheduleDAGMILive(C, make_unique<GCNMaxOccupancySchedStrategy>(C));
232d88c1a5aSDimitry Andric DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
233d88c1a5aSDimitry Andric DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
234c4394386SDimitry Andric DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
235d88c1a5aSDimitry Andric return DAG;
2368f0fd8f6SDimitry Andric }
2378f0fd8f6SDimitry Andric
2387a7e6055SDimitry Andric static ScheduleDAGInstrs *
createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext * C)2397a7e6055SDimitry Andric createIterativeGCNMaxOccupancyMachineScheduler(MachineSchedContext *C) {
2407a7e6055SDimitry Andric auto DAG = new GCNIterativeScheduler(C,
2417a7e6055SDimitry Andric GCNIterativeScheduler::SCHEDULE_LEGACYMAXOCCUPANCY);
2427a7e6055SDimitry Andric DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
2437a7e6055SDimitry Andric DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
2447a7e6055SDimitry Andric return DAG;
2457a7e6055SDimitry Andric }
2467a7e6055SDimitry Andric
createMinRegScheduler(MachineSchedContext * C)2477a7e6055SDimitry Andric static ScheduleDAGInstrs *createMinRegScheduler(MachineSchedContext *C) {
2487a7e6055SDimitry Andric return new GCNIterativeScheduler(C,
2497a7e6055SDimitry Andric GCNIterativeScheduler::SCHEDULE_MINREGFORCED);
2507a7e6055SDimitry Andric }
2517a7e6055SDimitry Andric
2522cab237bSDimitry Andric static ScheduleDAGInstrs *
createIterativeILPMachineScheduler(MachineSchedContext * C)2532cab237bSDimitry Andric createIterativeILPMachineScheduler(MachineSchedContext *C) {
2542cab237bSDimitry Andric auto DAG = new GCNIterativeScheduler(C,
2552cab237bSDimitry Andric GCNIterativeScheduler::SCHEDULE_ILP);
2562cab237bSDimitry Andric DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
2572cab237bSDimitry Andric DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
2582cab237bSDimitry Andric DAG->addMutation(createAMDGPUMacroFusionDAGMutation());
2592cab237bSDimitry Andric return DAG;
2602cab237bSDimitry Andric }
2612cab237bSDimitry Andric
2628f0fd8f6SDimitry Andric static MachineSchedRegistry
263444ed5c5SDimitry Andric R600SchedRegistry("r600", "Run R600's custom scheduler",
2648f0fd8f6SDimitry Andric createR600MachineScheduler);
2658f0fd8f6SDimitry Andric
266444ed5c5SDimitry Andric static MachineSchedRegistry
267444ed5c5SDimitry Andric SISchedRegistry("si", "Run SI's custom scheduler",
268444ed5c5SDimitry Andric createSIMachineScheduler);
269444ed5c5SDimitry Andric
270d88c1a5aSDimitry Andric static MachineSchedRegistry
271d88c1a5aSDimitry Andric GCNMaxOccupancySchedRegistry("gcn-max-occupancy",
272d88c1a5aSDimitry Andric "Run GCN scheduler to maximize occupancy",
273d88c1a5aSDimitry Andric createGCNMaxOccupancyMachineScheduler);
274d88c1a5aSDimitry Andric
2757a7e6055SDimitry Andric static MachineSchedRegistry
2767a7e6055SDimitry Andric IterativeGCNMaxOccupancySchedRegistry("gcn-max-occupancy-experimental",
2777a7e6055SDimitry Andric "Run GCN scheduler to maximize occupancy (experimental)",
2787a7e6055SDimitry Andric createIterativeGCNMaxOccupancyMachineScheduler);
2797a7e6055SDimitry Andric
2807a7e6055SDimitry Andric static MachineSchedRegistry
2817a7e6055SDimitry Andric GCNMinRegSchedRegistry("gcn-minreg",
2827a7e6055SDimitry Andric "Run GCN iterative scheduler for minimal register usage (experimental)",
2837a7e6055SDimitry Andric createMinRegScheduler);
2847a7e6055SDimitry Andric
2852cab237bSDimitry Andric static MachineSchedRegistry
2862cab237bSDimitry Andric GCNILPSchedRegistry("gcn-ilp",
2872cab237bSDimitry Andric "Run GCN iterative scheduler for ILP scheduling (experimental)",
2882cab237bSDimitry Andric createIterativeILPMachineScheduler);
2892cab237bSDimitry Andric
computeDataLayout(const Triple & TT)2903ca95b02SDimitry Andric static StringRef computeDataLayout(const Triple &TT) {
2913ca95b02SDimitry Andric if (TT.getArch() == Triple::r600) {
2923ca95b02SDimitry Andric // 32-bit pointers.
2932cab237bSDimitry Andric return "e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
2944ba319b5SDimitry Andric "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
2958f0fd8f6SDimitry Andric }
2968f0fd8f6SDimitry Andric
2973ca95b02SDimitry Andric // 32-bit private, local, and region pointers. 64-bit global, constant and
2983ca95b02SDimitry Andric // flat.
2994ba319b5SDimitry Andric return "e-p:64:64-p1:64:64-p2:32:32-p3:32:32-p4:64:64-p5:32:32-p6:32:32"
3007a7e6055SDimitry Andric "-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128"
3014ba319b5SDimitry Andric "-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64-S32-A5";
3023ca95b02SDimitry Andric }
3038f0fd8f6SDimitry Andric
3043ca95b02SDimitry Andric LLVM_READNONE
getGPUOrDefault(const Triple & TT,StringRef GPU)3053ca95b02SDimitry Andric static StringRef getGPUOrDefault(const Triple &TT, StringRef GPU) {
3063ca95b02SDimitry Andric if (!GPU.empty())
3073ca95b02SDimitry Andric return GPU;
3083ca95b02SDimitry Andric
3093ca95b02SDimitry Andric if (TT.getArch() == Triple::amdgcn)
3102cab237bSDimitry Andric return "generic";
3113ca95b02SDimitry Andric
3123ca95b02SDimitry Andric return "r600";
3133ca95b02SDimitry Andric }
3143ca95b02SDimitry Andric
getEffectiveRelocModel(Optional<Reloc::Model> RM)3153ca95b02SDimitry Andric static Reloc::Model getEffectiveRelocModel(Optional<Reloc::Model> RM) {
3163ca95b02SDimitry Andric // The AMDGPU toolchain only supports generating shared objects, so we
3173ca95b02SDimitry Andric // must always use PIC.
3183ca95b02SDimitry Andric return Reloc::PIC_;
3198f0fd8f6SDimitry Andric }
3208f0fd8f6SDimitry Andric
AMDGPUTargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,TargetOptions Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OptLevel)3218f0fd8f6SDimitry Andric AMDGPUTargetMachine::AMDGPUTargetMachine(const Target &T, const Triple &TT,
3228f0fd8f6SDimitry Andric StringRef CPU, StringRef FS,
3233ca95b02SDimitry Andric TargetOptions Options,
3243ca95b02SDimitry Andric Optional<Reloc::Model> RM,
3252cab237bSDimitry Andric Optional<CodeModel::Model> CM,
3268f0fd8f6SDimitry Andric CodeGenOpt::Level OptLevel)
3273ca95b02SDimitry Andric : LLVMTargetMachine(T, computeDataLayout(TT), TT, getGPUOrDefault(TT, CPU),
3282cab237bSDimitry Andric FS, Options, getEffectiveRelocModel(RM),
329*b5893f02SDimitry Andric getEffectiveCodeModel(CM, CodeModel::Small), OptLevel),
330d88c1a5aSDimitry Andric TLOF(createTLOF(getTargetTriple())) {
3318f0fd8f6SDimitry Andric initAsmInfo();
3328f0fd8f6SDimitry Andric }
3338f0fd8f6SDimitry Andric
3342cab237bSDimitry Andric bool AMDGPUTargetMachine::EnableLateStructurizeCFG = false;
3354ba319b5SDimitry Andric bool AMDGPUTargetMachine::EnableFunctionCalls = false;
3364ba319b5SDimitry Andric
3374ba319b5SDimitry Andric AMDGPUTargetMachine::~AMDGPUTargetMachine() = default;
3382cab237bSDimitry Andric
getGPUName(const Function & F) const3393ca95b02SDimitry Andric StringRef AMDGPUTargetMachine::getGPUName(const Function &F) const {
3403ca95b02SDimitry Andric Attribute GPUAttr = F.getFnAttribute("target-cpu");
3413ca95b02SDimitry Andric return GPUAttr.hasAttribute(Attribute::None) ?
3423ca95b02SDimitry Andric getTargetCPU() : GPUAttr.getValueAsString();
3433ca95b02SDimitry Andric }
3443ca95b02SDimitry Andric
getFeatureString(const Function & F) const3453ca95b02SDimitry Andric StringRef AMDGPUTargetMachine::getFeatureString(const Function &F) const {
3463ca95b02SDimitry Andric Attribute FSAttr = F.getFnAttribute("target-features");
3473ca95b02SDimitry Andric
3483ca95b02SDimitry Andric return FSAttr.hasAttribute(Attribute::None) ?
3493ca95b02SDimitry Andric getTargetFeatureString() :
3503ca95b02SDimitry Andric FSAttr.getValueAsString();
3513ca95b02SDimitry Andric }
3523ca95b02SDimitry Andric
3532cab237bSDimitry Andric /// Predicate for Internalize pass.
mustPreserveGV(const GlobalValue & GV)3542cab237bSDimitry Andric static bool mustPreserveGV(const GlobalValue &GV) {
3552cab237bSDimitry Andric if (const Function *F = dyn_cast<Function>(&GV))
3562cab237bSDimitry Andric return F->isDeclaration() || AMDGPU::isEntryFunctionCC(F->getCallingConv());
3572cab237bSDimitry Andric
3582cab237bSDimitry Andric return !GV.use_empty();
3592cab237bSDimitry Andric }
3602cab237bSDimitry Andric
adjustPassManager(PassManagerBuilder & Builder)3617a7e6055SDimitry Andric void AMDGPUTargetMachine::adjustPassManager(PassManagerBuilder &Builder) {
3627a7e6055SDimitry Andric Builder.DivergentTarget = true;
3637a7e6055SDimitry Andric
3642cab237bSDimitry Andric bool EnableOpt = getOptLevel() > CodeGenOpt::None;
3652cab237bSDimitry Andric bool Internalize = InternalizeSymbols;
3662cab237bSDimitry Andric bool EarlyInline = EarlyInlineAll && EnableOpt && !EnableAMDGPUFunctionCalls;
3672cab237bSDimitry Andric bool AMDGPUAA = EnableAMDGPUAliasAnalysis && EnableOpt;
3682cab237bSDimitry Andric bool LibCallSimplify = EnableLibCallSimplify && EnableOpt;
3692cab237bSDimitry Andric
3702cab237bSDimitry Andric if (EnableAMDGPUFunctionCalls) {
3712cab237bSDimitry Andric delete Builder.Inliner;
3722cab237bSDimitry Andric Builder.Inliner = createAMDGPUFunctionInliningPass();
3732cab237bSDimitry Andric }
3742cab237bSDimitry Andric
3757a7e6055SDimitry Andric Builder.addExtension(
3767a7e6055SDimitry Andric PassManagerBuilder::EP_ModuleOptimizerEarly,
3777a7e6055SDimitry Andric [Internalize, EarlyInline, AMDGPUAA](const PassManagerBuilder &,
3787a7e6055SDimitry Andric legacy::PassManagerBase &PM) {
3797a7e6055SDimitry Andric if (AMDGPUAA) {
3807a7e6055SDimitry Andric PM.add(createAMDGPUAAWrapperPass());
3817a7e6055SDimitry Andric PM.add(createAMDGPUExternalAAWrapperPass());
3827a7e6055SDimitry Andric }
383d88c1a5aSDimitry Andric PM.add(createAMDGPUUnifyMetadataPass());
3847a7e6055SDimitry Andric if (Internalize) {
3852cab237bSDimitry Andric PM.add(createInternalizePass(mustPreserveGV));
3867a7e6055SDimitry Andric PM.add(createGlobalDCEPass());
3877a7e6055SDimitry Andric }
3887a7e6055SDimitry Andric if (EarlyInline)
3897a7e6055SDimitry Andric PM.add(createAMDGPUAlwaysInlinePass(false));
3907a7e6055SDimitry Andric });
3917a7e6055SDimitry Andric
3922cab237bSDimitry Andric const auto &Opt = Options;
3937a7e6055SDimitry Andric Builder.addExtension(
3947a7e6055SDimitry Andric PassManagerBuilder::EP_EarlyAsPossible,
3952cab237bSDimitry Andric [AMDGPUAA, LibCallSimplify, &Opt](const PassManagerBuilder &,
3962cab237bSDimitry Andric legacy::PassManagerBase &PM) {
3977a7e6055SDimitry Andric if (AMDGPUAA) {
3987a7e6055SDimitry Andric PM.add(createAMDGPUAAWrapperPass());
3997a7e6055SDimitry Andric PM.add(createAMDGPUExternalAAWrapperPass());
4007a7e6055SDimitry Andric }
4012cab237bSDimitry Andric PM.add(llvm::createAMDGPUUseNativeCallsPass());
4022cab237bSDimitry Andric if (LibCallSimplify)
4032cab237bSDimitry Andric PM.add(llvm::createAMDGPUSimplifyLibCallsPass(Opt));
4047a7e6055SDimitry Andric });
405edd7eaddSDimitry Andric
406edd7eaddSDimitry Andric Builder.addExtension(
407edd7eaddSDimitry Andric PassManagerBuilder::EP_CGSCCOptimizerLate,
408edd7eaddSDimitry Andric [](const PassManagerBuilder &, legacy::PassManagerBase &PM) {
409edd7eaddSDimitry Andric // Add infer address spaces pass to the opt pipeline after inlining
410edd7eaddSDimitry Andric // but before SROA to increase SROA opportunities.
411edd7eaddSDimitry Andric PM.add(createInferAddressSpacesPass());
4124ba319b5SDimitry Andric
4134ba319b5SDimitry Andric // This should run after inlining to have any chance of doing anything,
4144ba319b5SDimitry Andric // and before other cleanup optimizations.
4154ba319b5SDimitry Andric PM.add(createAMDGPULowerKernelAttributesPass());
416edd7eaddSDimitry Andric });
417d88c1a5aSDimitry Andric }
418d88c1a5aSDimitry Andric
4198f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4208f0fd8f6SDimitry Andric // R600 Target Machine (R600 -> Cayman)
4218f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4228f0fd8f6SDimitry Andric
R600TargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,TargetOptions Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OL,bool JIT)4238f0fd8f6SDimitry Andric R600TargetMachine::R600TargetMachine(const Target &T, const Triple &TT,
4243ca95b02SDimitry Andric StringRef CPU, StringRef FS,
4253ca95b02SDimitry Andric TargetOptions Options,
4263ca95b02SDimitry Andric Optional<Reloc::Model> RM,
4272cab237bSDimitry Andric Optional<CodeModel::Model> CM,
4282cab237bSDimitry Andric CodeGenOpt::Level OL, bool JIT)
429d88c1a5aSDimitry Andric : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {
430d88c1a5aSDimitry Andric setRequiresStructuredCFG(true);
431d88c1a5aSDimitry Andric }
4323ca95b02SDimitry Andric
getSubtargetImpl(const Function & F) const4333ca95b02SDimitry Andric const R600Subtarget *R600TargetMachine::getSubtargetImpl(
4343ca95b02SDimitry Andric const Function &F) const {
4353ca95b02SDimitry Andric StringRef GPU = getGPUName(F);
4363ca95b02SDimitry Andric StringRef FS = getFeatureString(F);
4373ca95b02SDimitry Andric
4383ca95b02SDimitry Andric SmallString<128> SubtargetKey(GPU);
4393ca95b02SDimitry Andric SubtargetKey.append(FS);
4403ca95b02SDimitry Andric
4413ca95b02SDimitry Andric auto &I = SubtargetMap[SubtargetKey];
4423ca95b02SDimitry Andric if (!I) {
4433ca95b02SDimitry Andric // This needs to be done before we create a new subtarget since any
4443ca95b02SDimitry Andric // creation will depend on the TM and the code generation flags on the
4453ca95b02SDimitry Andric // function that reside in TargetOptions.
4463ca95b02SDimitry Andric resetTargetOptions(F);
4473ca95b02SDimitry Andric I = llvm::make_unique<R600Subtarget>(TargetTriple, GPU, FS, *this);
4483ca95b02SDimitry Andric }
4493ca95b02SDimitry Andric
4503ca95b02SDimitry Andric return I.get();
4513ca95b02SDimitry Andric }
4528f0fd8f6SDimitry Andric
4534ba319b5SDimitry Andric TargetTransformInfo
getTargetTransformInfo(const Function & F)4544ba319b5SDimitry Andric R600TargetMachine::getTargetTransformInfo(const Function &F) {
4554ba319b5SDimitry Andric return TargetTransformInfo(R600TTIImpl(this, F));
4564ba319b5SDimitry Andric }
4574ba319b5SDimitry Andric
4588f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4598f0fd8f6SDimitry Andric // GCN Target Machine (SI+)
4608f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4618f0fd8f6SDimitry Andric
GCNTargetMachine(const Target & T,const Triple & TT,StringRef CPU,StringRef FS,TargetOptions Options,Optional<Reloc::Model> RM,Optional<CodeModel::Model> CM,CodeGenOpt::Level OL,bool JIT)4628f0fd8f6SDimitry Andric GCNTargetMachine::GCNTargetMachine(const Target &T, const Triple &TT,
4633ca95b02SDimitry Andric StringRef CPU, StringRef FS,
4643ca95b02SDimitry Andric TargetOptions Options,
4653ca95b02SDimitry Andric Optional<Reloc::Model> RM,
4662cab237bSDimitry Andric Optional<CodeModel::Model> CM,
4672cab237bSDimitry Andric CodeGenOpt::Level OL, bool JIT)
4683ca95b02SDimitry Andric : AMDGPUTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL) {}
4693ca95b02SDimitry Andric
getSubtargetImpl(const Function & F) const4704ba319b5SDimitry Andric const GCNSubtarget *GCNTargetMachine::getSubtargetImpl(const Function &F) const {
4713ca95b02SDimitry Andric StringRef GPU = getGPUName(F);
4723ca95b02SDimitry Andric StringRef FS = getFeatureString(F);
4733ca95b02SDimitry Andric
4743ca95b02SDimitry Andric SmallString<128> SubtargetKey(GPU);
4753ca95b02SDimitry Andric SubtargetKey.append(FS);
4763ca95b02SDimitry Andric
4773ca95b02SDimitry Andric auto &I = SubtargetMap[SubtargetKey];
4783ca95b02SDimitry Andric if (!I) {
4793ca95b02SDimitry Andric // This needs to be done before we create a new subtarget since any
4803ca95b02SDimitry Andric // creation will depend on the TM and the code generation flags on the
4813ca95b02SDimitry Andric // function that reside in TargetOptions.
4823ca95b02SDimitry Andric resetTargetOptions(F);
4834ba319b5SDimitry Andric I = llvm::make_unique<GCNSubtarget>(TargetTriple, GPU, FS, *this);
4843ca95b02SDimitry Andric }
4853ca95b02SDimitry Andric
486d88c1a5aSDimitry Andric I->setScalarizeGlobalBehavior(ScalarizeGlobal);
487d88c1a5aSDimitry Andric
4883ca95b02SDimitry Andric return I.get();
4893ca95b02SDimitry Andric }
4908f0fd8f6SDimitry Andric
4914ba319b5SDimitry Andric TargetTransformInfo
getTargetTransformInfo(const Function & F)4924ba319b5SDimitry Andric GCNTargetMachine::getTargetTransformInfo(const Function &F) {
4934ba319b5SDimitry Andric return TargetTransformInfo(GCNTTIImpl(this, F));
4944ba319b5SDimitry Andric }
4954ba319b5SDimitry Andric
4968f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4978f0fd8f6SDimitry Andric // AMDGPU Pass Setup
4988f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
4998f0fd8f6SDimitry Andric
5008f0fd8f6SDimitry Andric namespace {
5013ca95b02SDimitry Andric
5028f0fd8f6SDimitry Andric class AMDGPUPassConfig : public TargetPassConfig {
5038f0fd8f6SDimitry Andric public:
AMDGPUPassConfig(LLVMTargetMachine & TM,PassManagerBase & PM)504f9448bf3SDimitry Andric AMDGPUPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
5057d523365SDimitry Andric : TargetPassConfig(TM, PM) {
5067d523365SDimitry Andric // Exceptions and StackMaps are not supported, so these passes will never do
5077d523365SDimitry Andric // anything.
5087d523365SDimitry Andric disablePass(&StackMapLivenessID);
5097d523365SDimitry Andric disablePass(&FuncletLayoutID);
5107d523365SDimitry Andric }
5118f0fd8f6SDimitry Andric
getAMDGPUTargetMachine() const5128f0fd8f6SDimitry Andric AMDGPUTargetMachine &getAMDGPUTargetMachine() const {
5138f0fd8f6SDimitry Andric return getTM<AMDGPUTargetMachine>();
5148f0fd8f6SDimitry Andric }
5158f0fd8f6SDimitry Andric
516d88c1a5aSDimitry Andric ScheduleDAGInstrs *
createMachineScheduler(MachineSchedContext * C) const517d88c1a5aSDimitry Andric createMachineScheduler(MachineSchedContext *C) const override {
518d88c1a5aSDimitry Andric ScheduleDAGMILive *DAG = createGenericSchedLive(C);
519d88c1a5aSDimitry Andric DAG->addMutation(createLoadClusterDAGMutation(DAG->TII, DAG->TRI));
520d88c1a5aSDimitry Andric DAG->addMutation(createStoreClusterDAGMutation(DAG->TII, DAG->TRI));
521d88c1a5aSDimitry Andric return DAG;
522d88c1a5aSDimitry Andric }
523d88c1a5aSDimitry Andric
5243ca95b02SDimitry Andric void addEarlyCSEOrGVNPass();
5253ca95b02SDimitry Andric void addStraightLineScalarOptimizationPasses();
5268f0fd8f6SDimitry Andric void addIRPasses() override;
5278f0fd8f6SDimitry Andric void addCodeGenPrepare() override;
5287d523365SDimitry Andric bool addPreISel() override;
5297d523365SDimitry Andric bool addInstSelector() override;
5307d523365SDimitry Andric bool addGCPasses() override;
5318f0fd8f6SDimitry Andric };
5328f0fd8f6SDimitry Andric
5333ca95b02SDimitry Andric class R600PassConfig final : public AMDGPUPassConfig {
5348f0fd8f6SDimitry Andric public:
R600PassConfig(LLVMTargetMachine & TM,PassManagerBase & PM)535f9448bf3SDimitry Andric R600PassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
5368f0fd8f6SDimitry Andric : AMDGPUPassConfig(TM, PM) {}
5378f0fd8f6SDimitry Andric
createMachineScheduler(MachineSchedContext * C) const5383ca95b02SDimitry Andric ScheduleDAGInstrs *createMachineScheduler(
5393ca95b02SDimitry Andric MachineSchedContext *C) const override {
5403ca95b02SDimitry Andric return createR600MachineScheduler(C);
5413ca95b02SDimitry Andric }
5423ca95b02SDimitry Andric
5438f0fd8f6SDimitry Andric bool addPreISel() override;
5442cab237bSDimitry Andric bool addInstSelector() override;
5458f0fd8f6SDimitry Andric void addPreRegAlloc() override;
5468f0fd8f6SDimitry Andric void addPreSched2() override;
5478f0fd8f6SDimitry Andric void addPreEmitPass() override;
5488f0fd8f6SDimitry Andric };
5498f0fd8f6SDimitry Andric
5503ca95b02SDimitry Andric class GCNPassConfig final : public AMDGPUPassConfig {
5518f0fd8f6SDimitry Andric public:
GCNPassConfig(LLVMTargetMachine & TM,PassManagerBase & PM)552f9448bf3SDimitry Andric GCNPassConfig(LLVMTargetMachine &TM, PassManagerBase &PM)
5532cab237bSDimitry Andric : AMDGPUPassConfig(TM, PM) {
5542cab237bSDimitry Andric // It is necessary to know the register usage of the entire call graph. We
5552cab237bSDimitry Andric // allow calls without EnableAMDGPUFunctionCalls if they are marked
5562cab237bSDimitry Andric // noinline, so this is always required.
5572cab237bSDimitry Andric setRequiresCodeGenSCCOrder(true);
5582cab237bSDimitry Andric }
5593ca95b02SDimitry Andric
getGCNTargetMachine() const5603ca95b02SDimitry Andric GCNTargetMachine &getGCNTargetMachine() const {
5613ca95b02SDimitry Andric return getTM<GCNTargetMachine>();
5623ca95b02SDimitry Andric }
5633ca95b02SDimitry Andric
5643ca95b02SDimitry Andric ScheduleDAGInstrs *
5653ca95b02SDimitry Andric createMachineScheduler(MachineSchedContext *C) const override;
5663ca95b02SDimitry Andric
5678f0fd8f6SDimitry Andric bool addPreISel() override;
5683ca95b02SDimitry Andric void addMachineSSAOptimization() override;
5697a7e6055SDimitry Andric bool addILPOpts() override;
5708f0fd8f6SDimitry Andric bool addInstSelector() override;
5713ca95b02SDimitry Andric bool addIRTranslator() override;
572d88c1a5aSDimitry Andric bool addLegalizeMachineIR() override;
5733ca95b02SDimitry Andric bool addRegBankSelect() override;
574d88c1a5aSDimitry Andric bool addGlobalInstructionSelect() override;
5757d523365SDimitry Andric void addFastRegAlloc(FunctionPass *RegAllocPass) override;
5767d523365SDimitry Andric void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
5778f0fd8f6SDimitry Andric void addPreRegAlloc() override;
578d88c1a5aSDimitry Andric void addPostRegAlloc() override;
5798f0fd8f6SDimitry Andric void addPreSched2() override;
5808f0fd8f6SDimitry Andric void addPreEmitPass() override;
5818f0fd8f6SDimitry Andric };
5828f0fd8f6SDimitry Andric
583d88c1a5aSDimitry Andric } // end anonymous namespace
5848f0fd8f6SDimitry Andric
addEarlyCSEOrGVNPass()5853ca95b02SDimitry Andric void AMDGPUPassConfig::addEarlyCSEOrGVNPass() {
5863ca95b02SDimitry Andric if (getOptLevel() == CodeGenOpt::Aggressive)
5873ca95b02SDimitry Andric addPass(createGVNPass());
5883ca95b02SDimitry Andric else
5893ca95b02SDimitry Andric addPass(createEarlyCSEPass());
5903ca95b02SDimitry Andric }
5913ca95b02SDimitry Andric
addStraightLineScalarOptimizationPasses()5923ca95b02SDimitry Andric void AMDGPUPassConfig::addStraightLineScalarOptimizationPasses() {
5934ba319b5SDimitry Andric addPass(createLICMPass());
5943ca95b02SDimitry Andric addPass(createSeparateConstOffsetFromGEPPass());
5953ca95b02SDimitry Andric addPass(createSpeculativeExecutionPass());
5963ca95b02SDimitry Andric // ReassociateGEPs exposes more opportunites for SLSR. See
5973ca95b02SDimitry Andric // the example in reassociate-geps-and-slsr.ll.
5983ca95b02SDimitry Andric addPass(createStraightLineStrengthReducePass());
5993ca95b02SDimitry Andric // SeparateConstOffsetFromGEP and SLSR creates common expressions which GVN or
6003ca95b02SDimitry Andric // EarlyCSE can reuse.
6013ca95b02SDimitry Andric addEarlyCSEOrGVNPass();
6023ca95b02SDimitry Andric // Run NaryReassociate after EarlyCSE/GVN to be more effective.
6033ca95b02SDimitry Andric addPass(createNaryReassociatePass());
6043ca95b02SDimitry Andric // NaryReassociate on GEPs creates redundant common expressions, so run
6053ca95b02SDimitry Andric // EarlyCSE after it.
6063ca95b02SDimitry Andric addPass(createEarlyCSEPass());
6073ca95b02SDimitry Andric }
6083ca95b02SDimitry Andric
addIRPasses()6098f0fd8f6SDimitry Andric void AMDGPUPassConfig::addIRPasses() {
6107a7e6055SDimitry Andric const AMDGPUTargetMachine &TM = getAMDGPUTargetMachine();
6117a7e6055SDimitry Andric
6123ca95b02SDimitry Andric // There is no reason to run these.
6133ca95b02SDimitry Andric disablePass(&StackMapLivenessID);
6143ca95b02SDimitry Andric disablePass(&FuncletLayoutID);
6153ca95b02SDimitry Andric disablePass(&PatchableFunctionID);
6163ca95b02SDimitry Andric
617*b5893f02SDimitry Andric addPass(createAtomicExpandPass());
618*b5893f02SDimitry Andric
619*b5893f02SDimitry Andric // This must occur before inlining, as the inliner will not look through
620*b5893f02SDimitry Andric // bitcast calls.
621*b5893f02SDimitry Andric addPass(createAMDGPUFixFunctionBitcastsPass());
622*b5893f02SDimitry Andric
623d8866befSDimitry Andric addPass(createAMDGPULowerIntrinsicsPass());
6247a7e6055SDimitry Andric
6258f0fd8f6SDimitry Andric // Function calls are not supported, so make sure we inline everything.
6268f0fd8f6SDimitry Andric addPass(createAMDGPUAlwaysInlinePass());
627d88c1a5aSDimitry Andric addPass(createAlwaysInlinerLegacyPass());
6288f0fd8f6SDimitry Andric // We need to add the barrier noop pass, otherwise adding the function
6298f0fd8f6SDimitry Andric // inlining pass will cause all of the PassConfigs passes to be run
6308f0fd8f6SDimitry Andric // one function at a time, which means if we have a nodule with two
6318f0fd8f6SDimitry Andric // functions, then we will generate code for the first function
6328f0fd8f6SDimitry Andric // without ever running any passes on the second.
6338f0fd8f6SDimitry Andric addPass(createBarrierNoopPass());
6347d523365SDimitry Andric
6357a7e6055SDimitry Andric if (TM.getTargetTriple().getArch() == Triple::amdgcn) {
6367a7e6055SDimitry Andric // TODO: May want to move later or split into an early and late one.
6377a7e6055SDimitry Andric
638d8866befSDimitry Andric addPass(createAMDGPUCodeGenPreparePass());
6397a7e6055SDimitry Andric }
6407a7e6055SDimitry Andric
6417d523365SDimitry Andric // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments.
6424ba319b5SDimitry Andric if (TM.getTargetTriple().getArch() == Triple::r600)
6434ba319b5SDimitry Andric addPass(createR600OpenCLImageTypeLoweringPass());
6447d523365SDimitry Andric
6452cab237bSDimitry Andric // Replace OpenCL enqueued block function pointers with global variables.
6462cab237bSDimitry Andric addPass(createAMDGPUOpenCLEnqueuedBlockLoweringPass());
6472cab237bSDimitry Andric
6483ca95b02SDimitry Andric if (TM.getOptLevel() > CodeGenOpt::None) {
6497a7e6055SDimitry Andric addPass(createInferAddressSpacesPass());
650d8866befSDimitry Andric addPass(createAMDGPUPromoteAlloca());
6513ca95b02SDimitry Andric
6523ca95b02SDimitry Andric if (EnableSROA)
6533ca95b02SDimitry Andric addPass(createSROAPass());
6543ca95b02SDimitry Andric
6553ca95b02SDimitry Andric addStraightLineScalarOptimizationPasses();
6567a7e6055SDimitry Andric
6577a7e6055SDimitry Andric if (EnableAMDGPUAliasAnalysis) {
6587a7e6055SDimitry Andric addPass(createAMDGPUAAWrapperPass());
6597a7e6055SDimitry Andric addPass(createExternalAAWrapperPass([](Pass &P, Function &,
6607a7e6055SDimitry Andric AAResults &AAR) {
6617a7e6055SDimitry Andric if (auto *WrapperPass = P.getAnalysisIfAvailable<AMDGPUAAWrapperPass>())
6627a7e6055SDimitry Andric AAR.addAAResult(WrapperPass->getResult());
6637a7e6055SDimitry Andric }));
6647a7e6055SDimitry Andric }
665d88c1a5aSDimitry Andric }
6663ca95b02SDimitry Andric
6678f0fd8f6SDimitry Andric TargetPassConfig::addIRPasses();
6683ca95b02SDimitry Andric
6693ca95b02SDimitry Andric // EarlyCSE is not always strong enough to clean up what LSR produces. For
6703ca95b02SDimitry Andric // example, GVN can combine
6713ca95b02SDimitry Andric //
6723ca95b02SDimitry Andric // %0 = add %a, %b
6733ca95b02SDimitry Andric // %1 = add %b, %a
6743ca95b02SDimitry Andric //
6753ca95b02SDimitry Andric // and
6763ca95b02SDimitry Andric //
6773ca95b02SDimitry Andric // %0 = shl nsw %a, 2
6783ca95b02SDimitry Andric // %1 = shl %a, 2
6793ca95b02SDimitry Andric //
6803ca95b02SDimitry Andric // but EarlyCSE can do neither of them.
6813ca95b02SDimitry Andric if (getOptLevel() != CodeGenOpt::None)
6823ca95b02SDimitry Andric addEarlyCSEOrGVNPass();
6838f0fd8f6SDimitry Andric }
6848f0fd8f6SDimitry Andric
addCodeGenPrepare()6858f0fd8f6SDimitry Andric void AMDGPUPassConfig::addCodeGenPrepare() {
6864ba319b5SDimitry Andric if (TM->getTargetTriple().getArch() == Triple::amdgcn &&
6874ba319b5SDimitry Andric EnableLowerKernelArguments)
6884ba319b5SDimitry Andric addPass(createAMDGPULowerKernelArgumentsPass());
6894ba319b5SDimitry Andric
6908f0fd8f6SDimitry Andric TargetPassConfig::addCodeGenPrepare();
6913ca95b02SDimitry Andric
6923ca95b02SDimitry Andric if (EnableLoadStoreVectorizer)
6933ca95b02SDimitry Andric addPass(createLoadStoreVectorizerPass());
6948f0fd8f6SDimitry Andric }
6958f0fd8f6SDimitry Andric
addPreISel()6963ca95b02SDimitry Andric bool AMDGPUPassConfig::addPreISel() {
697*b5893f02SDimitry Andric addPass(createLowerSwitchPass());
6988f0fd8f6SDimitry Andric addPass(createFlattenCFGPass());
6998f0fd8f6SDimitry Andric return false;
7008f0fd8f6SDimitry Andric }
7018f0fd8f6SDimitry Andric
addInstSelector()7028f0fd8f6SDimitry Andric bool AMDGPUPassConfig::addInstSelector() {
7032cab237bSDimitry Andric addPass(createAMDGPUISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
7048f0fd8f6SDimitry Andric return false;
7058f0fd8f6SDimitry Andric }
7068f0fd8f6SDimitry Andric
addGCPasses()7077d523365SDimitry Andric bool AMDGPUPassConfig::addGCPasses() {
7087d523365SDimitry Andric // Do nothing. GC is not supported.
7097d523365SDimitry Andric return false;
7107d523365SDimitry Andric }
7117d523365SDimitry Andric
7128f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
7138f0fd8f6SDimitry Andric // R600 Pass Setup
7148f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
7158f0fd8f6SDimitry Andric
addPreISel()7168f0fd8f6SDimitry Andric bool R600PassConfig::addPreISel() {
7178f0fd8f6SDimitry Andric AMDGPUPassConfig::addPreISel();
7183ca95b02SDimitry Andric
7193ca95b02SDimitry Andric if (EnableR600StructurizeCFG)
7203ca95b02SDimitry Andric addPass(createStructurizeCFGPass());
7218f0fd8f6SDimitry Andric return false;
7228f0fd8f6SDimitry Andric }
7238f0fd8f6SDimitry Andric
addInstSelector()7242cab237bSDimitry Andric bool R600PassConfig::addInstSelector() {
7252cab237bSDimitry Andric addPass(createR600ISelDag(&getAMDGPUTargetMachine(), getOptLevel()));
7262cab237bSDimitry Andric return false;
7272cab237bSDimitry Andric }
7282cab237bSDimitry Andric
addPreRegAlloc()7298f0fd8f6SDimitry Andric void R600PassConfig::addPreRegAlloc() {
730d8866befSDimitry Andric addPass(createR600VectorRegMerger());
7318f0fd8f6SDimitry Andric }
7328f0fd8f6SDimitry Andric
addPreSched2()7338f0fd8f6SDimitry Andric void R600PassConfig::addPreSched2() {
7348f0fd8f6SDimitry Andric addPass(createR600EmitClauseMarkers(), false);
7353ca95b02SDimitry Andric if (EnableR600IfConvert)
7368f0fd8f6SDimitry Andric addPass(&IfConverterID, false);
737d8866befSDimitry Andric addPass(createR600ClauseMergePass(), false);
7388f0fd8f6SDimitry Andric }
7398f0fd8f6SDimitry Andric
addPreEmitPass()7408f0fd8f6SDimitry Andric void R600PassConfig::addPreEmitPass() {
7418f0fd8f6SDimitry Andric addPass(createAMDGPUCFGStructurizerPass(), false);
742d8866befSDimitry Andric addPass(createR600ExpandSpecialInstrsPass(), false);
7438f0fd8f6SDimitry Andric addPass(&FinalizeMachineBundlesID, false);
744d8866befSDimitry Andric addPass(createR600Packetizer(), false);
745d8866befSDimitry Andric addPass(createR600ControlFlowFinalizer(), false);
7468f0fd8f6SDimitry Andric }
7478f0fd8f6SDimitry Andric
createPassConfig(PassManagerBase & PM)7488f0fd8f6SDimitry Andric TargetPassConfig *R600TargetMachine::createPassConfig(PassManagerBase &PM) {
749f9448bf3SDimitry Andric return new R600PassConfig(*this, PM);
7508f0fd8f6SDimitry Andric }
7518f0fd8f6SDimitry Andric
7528f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
7538f0fd8f6SDimitry Andric // GCN Pass Setup
7548f0fd8f6SDimitry Andric //===----------------------------------------------------------------------===//
7558f0fd8f6SDimitry Andric
createMachineScheduler(MachineSchedContext * C) const7563ca95b02SDimitry Andric ScheduleDAGInstrs *GCNPassConfig::createMachineScheduler(
7573ca95b02SDimitry Andric MachineSchedContext *C) const {
7584ba319b5SDimitry Andric const GCNSubtarget &ST = C->MF->getSubtarget<GCNSubtarget>();
7593ca95b02SDimitry Andric if (ST.enableSIScheduler())
7603ca95b02SDimitry Andric return createSIMachineScheduler(C);
761d88c1a5aSDimitry Andric return createGCNMaxOccupancyMachineScheduler(C);
7623ca95b02SDimitry Andric }
7633ca95b02SDimitry Andric
addPreISel()7648f0fd8f6SDimitry Andric bool GCNPassConfig::addPreISel() {
7658f0fd8f6SDimitry Andric AMDGPUPassConfig::addPreISel();
7667d523365SDimitry Andric
767*b5893f02SDimitry Andric if (EnableAtomicOptimizations) {
768*b5893f02SDimitry Andric addPass(createAMDGPUAtomicOptimizerPass());
769*b5893f02SDimitry Andric }
770*b5893f02SDimitry Andric
7717d523365SDimitry Andric // FIXME: We need to run a pass to propagate the attributes when calls are
7727d523365SDimitry Andric // supported.
773d8866befSDimitry Andric addPass(createAMDGPUAnnotateKernelFeaturesPass());
7747a7e6055SDimitry Andric
7757a7e6055SDimitry Andric // Merge divergent exit nodes. StructurizeCFG won't recognize the multi-exit
7767a7e6055SDimitry Andric // regions formed by them.
7777a7e6055SDimitry Andric addPass(&AMDGPUUnifyDivergentExitNodesID);
7785517e702SDimitry Andric if (!LateCFGStructurize) {
7793ca95b02SDimitry Andric addPass(createStructurizeCFGPass(true)); // true -> SkipUniformRegions
7805517e702SDimitry Andric }
7818f0fd8f6SDimitry Andric addPass(createSinkingPass());
7827d523365SDimitry Andric addPass(createAMDGPUAnnotateUniformValues());
7835517e702SDimitry Andric if (!LateCFGStructurize) {
7843ca95b02SDimitry Andric addPass(createSIAnnotateControlFlowPass());
7855517e702SDimitry Andric }
7867d523365SDimitry Andric
7878f0fd8f6SDimitry Andric return false;
7888f0fd8f6SDimitry Andric }
7898f0fd8f6SDimitry Andric
addMachineSSAOptimization()7903ca95b02SDimitry Andric void GCNPassConfig::addMachineSSAOptimization() {
7913ca95b02SDimitry Andric TargetPassConfig::addMachineSSAOptimization();
7923ca95b02SDimitry Andric
7933ca95b02SDimitry Andric // We want to fold operands after PeepholeOptimizer has run (or as part of
7943ca95b02SDimitry Andric // it), because it will eliminate extra copies making it easier to fold the
7953ca95b02SDimitry Andric // real source operand. We want to eliminate dead instructions after, so that
7963ca95b02SDimitry Andric // we see fewer uses of the copies. We then need to clean up the dead
7973ca95b02SDimitry Andric // instructions leftover after the operands are folded as well.
7983ca95b02SDimitry Andric //
7993ca95b02SDimitry Andric // XXX - Can we get away without running DeadMachineInstructionElim again?
8003ca95b02SDimitry Andric addPass(&SIFoldOperandsID);
801*b5893f02SDimitry Andric if (EnableDPPCombine)
802*b5893f02SDimitry Andric addPass(&GCNDPPCombineID);
8033ca95b02SDimitry Andric addPass(&DeadMachineInstructionElimID);
804d88c1a5aSDimitry Andric addPass(&SILoadStoreOptimizerID);
8057a7e6055SDimitry Andric if (EnableSDWAPeephole) {
8067a7e6055SDimitry Andric addPass(&SIPeepholeSDWAID);
8074ba319b5SDimitry Andric addPass(&EarlyMachineLICMID);
80889cb50c9SDimitry Andric addPass(&MachineCSEID);
80989cb50c9SDimitry Andric addPass(&SIFoldOperandsID);
8107a7e6055SDimitry Andric addPass(&DeadMachineInstructionElimID);
8117a7e6055SDimitry Andric }
812db17bf38SDimitry Andric addPass(createSIShrinkInstructionsPass());
8133ca95b02SDimitry Andric }
8143ca95b02SDimitry Andric
addILPOpts()8157a7e6055SDimitry Andric bool GCNPassConfig::addILPOpts() {
8167a7e6055SDimitry Andric if (EnableEarlyIfConversion)
8177a7e6055SDimitry Andric addPass(&EarlyIfConverterID);
8186c4bc1bdSDimitry Andric
8197a7e6055SDimitry Andric TargetPassConfig::addILPOpts();
8207a7e6055SDimitry Andric return false;
8216c4bc1bdSDimitry Andric }
8226c4bc1bdSDimitry Andric
addInstSelector()8238f0fd8f6SDimitry Andric bool GCNPassConfig::addInstSelector() {
8248f0fd8f6SDimitry Andric AMDGPUPassConfig::addInstSelector();
8257d523365SDimitry Andric addPass(&SIFixSGPRCopiesID);
826*b5893f02SDimitry Andric addPass(createSILowerI1CopiesPass());
827*b5893f02SDimitry Andric addPass(createSIFixupVectorISelPass());
828*b5893f02SDimitry Andric addPass(createSIAddIMGInitPass());
8298f0fd8f6SDimitry Andric return false;
8308f0fd8f6SDimitry Andric }
8318f0fd8f6SDimitry Andric
addIRTranslator()8323ca95b02SDimitry Andric bool GCNPassConfig::addIRTranslator() {
8333ca95b02SDimitry Andric addPass(new IRTranslator());
8343ca95b02SDimitry Andric return false;
8353ca95b02SDimitry Andric }
8368f0fd8f6SDimitry Andric
addLegalizeMachineIR()837d88c1a5aSDimitry Andric bool GCNPassConfig::addLegalizeMachineIR() {
8387a7e6055SDimitry Andric addPass(new Legalizer());
839d88c1a5aSDimitry Andric return false;
840d88c1a5aSDimitry Andric }
841d88c1a5aSDimitry Andric
addRegBankSelect()8423ca95b02SDimitry Andric bool GCNPassConfig::addRegBankSelect() {
8437a7e6055SDimitry Andric addPass(new RegBankSelect());
8443ca95b02SDimitry Andric return false;
8453ca95b02SDimitry Andric }
846d88c1a5aSDimitry Andric
addGlobalInstructionSelect()847d88c1a5aSDimitry Andric bool GCNPassConfig::addGlobalInstructionSelect() {
8487a7e6055SDimitry Andric addPass(new InstructionSelect());
849d88c1a5aSDimitry Andric return false;
850d88c1a5aSDimitry Andric }
8517a7e6055SDimitry Andric
addPreRegAlloc()8523ca95b02SDimitry Andric void GCNPassConfig::addPreRegAlloc() {
8535517e702SDimitry Andric if (LateCFGStructurize) {
8545517e702SDimitry Andric addPass(createAMDGPUMachineCFGStructurizerPass());
8555517e702SDimitry Andric }
8563ca95b02SDimitry Andric addPass(createSIWholeQuadModePass());
8577d523365SDimitry Andric }
8587d523365SDimitry Andric
addFastRegAlloc(FunctionPass * RegAllocPass)8597d523365SDimitry Andric void GCNPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
860d88c1a5aSDimitry Andric // FIXME: We have to disable the verifier here because of PHIElimination +
861d88c1a5aSDimitry Andric // TwoAddressInstructions disabling it.
862d88c1a5aSDimitry Andric
863d88c1a5aSDimitry Andric // This must be run immediately after phi elimination and before
864d88c1a5aSDimitry Andric // TwoAddressInstructions, otherwise the processing of the tied operand of
865d88c1a5aSDimitry Andric // SI_ELSE will introduce a copy of the tied operand source after the else.
866d88c1a5aSDimitry Andric insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
867d88c1a5aSDimitry Andric
8682cab237bSDimitry Andric // This must be run after SILowerControlFlow, since it needs to use the
8692cab237bSDimitry Andric // machine-level CFG, but before register allocation.
8702cab237bSDimitry Andric insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
8712cab237bSDimitry Andric
8727d523365SDimitry Andric TargetPassConfig::addFastRegAlloc(RegAllocPass);
8737d523365SDimitry Andric }
8747d523365SDimitry Andric
addOptimizedRegAlloc(FunctionPass * RegAllocPass)8757d523365SDimitry Andric void GCNPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
8762cab237bSDimitry Andric insertPass(&MachineSchedulerID, &SIOptimizeExecMaskingPreRAID);
877d88c1a5aSDimitry Andric
8784ba319b5SDimitry Andric insertPass(&SIOptimizeExecMaskingPreRAID, &SIFormMemoryClausesID);
8794ba319b5SDimitry Andric
880d88c1a5aSDimitry Andric // This must be run immediately after phi elimination and before
881d88c1a5aSDimitry Andric // TwoAddressInstructions, otherwise the processing of the tied operand of
882d88c1a5aSDimitry Andric // SI_ELSE will introduce a copy of the tied operand source after the else.
883d88c1a5aSDimitry Andric insertPass(&PHIEliminationID, &SILowerControlFlowID, false);
884d88c1a5aSDimitry Andric
8852cab237bSDimitry Andric // This must be run after SILowerControlFlow, since it needs to use the
8862cab237bSDimitry Andric // machine-level CFG, but before register allocation.
8872cab237bSDimitry Andric insertPass(&SILowerControlFlowID, &SIFixWWMLivenessID, false);
8882cab237bSDimitry Andric
8897d523365SDimitry Andric TargetPassConfig::addOptimizedRegAlloc(RegAllocPass);
8908f0fd8f6SDimitry Andric }
8918f0fd8f6SDimitry Andric
addPostRegAlloc()892d88c1a5aSDimitry Andric void GCNPassConfig::addPostRegAlloc() {
8937a7e6055SDimitry Andric addPass(&SIFixVGPRCopiesID);
894*b5893f02SDimitry Andric if (getOptLevel() > CodeGenOpt::None)
895d88c1a5aSDimitry Andric addPass(&SIOptimizeExecMaskingID);
896d88c1a5aSDimitry Andric TargetPassConfig::addPostRegAlloc();
897d88c1a5aSDimitry Andric }
898d88c1a5aSDimitry Andric
addPreSched2()8998f0fd8f6SDimitry Andric void GCNPassConfig::addPreSched2() {
9008f0fd8f6SDimitry Andric }
9018f0fd8f6SDimitry Andric
addPreEmitPass()9028f0fd8f6SDimitry Andric void GCNPassConfig::addPreEmitPass() {
9034ba319b5SDimitry Andric addPass(createSIMemoryLegalizerPass());
9044ba319b5SDimitry Andric addPass(createSIInsertWaitcntsPass());
9054ba319b5SDimitry Andric addPass(createSIShrinkInstructionsPass());
906*b5893f02SDimitry Andric addPass(createSIModeRegisterPass());
9074ba319b5SDimitry Andric
9083ca95b02SDimitry Andric // The hazard recognizer that runs as part of the post-ra scheduler does not
9093ca95b02SDimitry Andric // guarantee to be able handle all hazards correctly. This is because if there
9103ca95b02SDimitry Andric // are multiple scheduling regions in a basic block, the regions are scheduled
9113ca95b02SDimitry Andric // bottom up, so when we begin to schedule a region we don't know what
9123ca95b02SDimitry Andric // instructions were emitted directly before it.
9133ca95b02SDimitry Andric //
9143ca95b02SDimitry Andric // Here we add a stand-alone hazard recognizer pass which can handle all
9153ca95b02SDimitry Andric // cases.
9164ba319b5SDimitry Andric //
9174ba319b5SDimitry Andric // FIXME: This stand-alone pass will emit indiv. S_NOP 0, as needed. It would
9184ba319b5SDimitry Andric // be better for it to emit S_NOP <N> when possible.
9193ca95b02SDimitry Andric addPass(&PostRAHazardRecognizerID);
9203ca95b02SDimitry Andric
921d88c1a5aSDimitry Andric addPass(&SIInsertSkipsPassID);
9223ca95b02SDimitry Andric addPass(createSIDebuggerInsertNopsPass());
923d88c1a5aSDimitry Andric addPass(&BranchRelaxationPassID);
9248f0fd8f6SDimitry Andric }
9258f0fd8f6SDimitry Andric
createPassConfig(PassManagerBase & PM)9268f0fd8f6SDimitry Andric TargetPassConfig *GCNTargetMachine::createPassConfig(PassManagerBase &PM) {
927f9448bf3SDimitry Andric return new GCNPassConfig(*this, PM);
9288f0fd8f6SDimitry Andric }
929