1 //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering  -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file contains a DAG scheduling mutation to cluster shader
10 ///       exports.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPUExportClustering.h"
15 #include "AMDGPUSubtarget.h"
16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17 #include "SIInstrInfo.h"
18 
19 using namespace llvm;
20 
21 namespace {
22 
23 class ExportClustering : public ScheduleDAGMutation {
24 public:
25   ExportClustering() {}
26   void apply(ScheduleDAGInstrs *DAG) override;
27 };
28 
29 static bool isExport(const SUnit &SU) {
30   const MachineInstr *MI = SU.getInstr();
31   return MI->getOpcode() == AMDGPU::EXP ||
32          MI->getOpcode() == AMDGPU::EXP_DONE;
33 }
34 
35 static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
36   const MachineInstr *MI = SU->getInstr();
37   int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
38   return Imm >= 12 && Imm <= 15;
39 }
40 
41 static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
42                       unsigned PosCount) {
43   if (!PosCount || PosCount == Chain.size())
44     return;
45 
46   // Position exports should occur as soon as possible in the shader
47   // for optimal performance.  This moves position exports before
48   // other exports while preserving the order within different export
49   // types (pos or other).
50   SmallVector<SUnit *, 8> Copy(Chain);
51   unsigned PosIdx = 0;
52   unsigned OtherIdx = PosCount;
53   for (SUnit *SU : Copy) {
54     if (isPositionExport(TII, SU))
55       Chain[PosIdx++] = SU;
56     else
57       Chain[OtherIdx++] = SU;
58   }
59 }
60 
61 static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
62   SUnit *ChainHead = Exports.front();
63 
64   // Now construct cluster from chain by adding new edges.
65   for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
66     SUnit *SUa = Exports[Idx];
67     SUnit *SUb = Exports[Idx + 1];
68 
69     // Copy all dependencies to the head of the chain to avoid any
70     // computation being inserted into the chain.
71     for (const SDep &Pred : SUb->Preds) {
72       SUnit *PredSU = Pred.getSUnit();
73       if (!isExport(*PredSU) && !Pred.isWeak())
74         DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
75     }
76 
77     // New barrier edge ordering exports
78     DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
79     // Also add cluster edge
80     DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
81   }
82 }
83 
84 void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
85   const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
86 
87   SmallVector<SUnit *, 8> Chain;
88 
89   // Pass through DAG gathering a list of exports and removing barrier edges
90   // creating dependencies on exports. Freeing exports of successor edges
91   // allows more scheduling freedom, and nothing should be order dependent
92   // on exports.  Edges will be added later to order the exports.
93   unsigned PosCount = 0;
94   for (SUnit &SU : DAG->SUnits) {
95     if (isExport(SU)) {
96       Chain.push_back(&SU);
97       if (isPositionExport(TII, &SU))
98         PosCount++;
99     }
100 
101     SmallVector<SDep, 2> ToRemove;
102     for (const SDep &Pred : SU.Preds) {
103       SUnit *PredSU = Pred.getSUnit();
104       if (Pred.isBarrier() && isExport(*PredSU))
105         ToRemove.push_back(Pred);
106     }
107     for (SDep Pred : ToRemove)
108       SU.removePred(Pred);
109   }
110 
111   // Apply clustering if there are multiple exports
112   if (Chain.size() > 1) {
113     sortChain(TII, Chain, PosCount);
114     buildCluster(Chain, DAG);
115   }
116 }
117 
118 } // end namespace
119 
120 namespace llvm {
121 
122 std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
123   return std::make_unique<ExportClustering>();
124 }
125 
126 } // end namespace llvm
127