1e3ffe726SCarl Ritson //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering  -------------===//
2e3ffe726SCarl Ritson //
3e3ffe726SCarl Ritson // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e3ffe726SCarl Ritson // See https://llvm.org/LICENSE.txt for license information.
5e3ffe726SCarl Ritson // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e3ffe726SCarl Ritson //
7e3ffe726SCarl Ritson //===----------------------------------------------------------------------===//
8e3ffe726SCarl Ritson //
9e3ffe726SCarl Ritson /// \file This file contains a DAG scheduling mutation to cluster shader
10e3ffe726SCarl Ritson ///       exports.
11e3ffe726SCarl Ritson //
12e3ffe726SCarl Ritson //===----------------------------------------------------------------------===//
13e3ffe726SCarl Ritson 
14e3ffe726SCarl Ritson #include "AMDGPUExportClustering.h"
15e3ffe726SCarl Ritson #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
16e3ffe726SCarl Ritson #include "SIInstrInfo.h"
176a87e9b0Sdfukalov #include "llvm/CodeGen/ScheduleDAGInstrs.h"
18e3ffe726SCarl Ritson 
19e3ffe726SCarl Ritson using namespace llvm;
20e3ffe726SCarl Ritson 
21e3ffe726SCarl Ritson namespace {
22e3ffe726SCarl Ritson 
23e3ffe726SCarl Ritson class ExportClustering : public ScheduleDAGMutation {
24e3ffe726SCarl Ritson public:
25*3a3cb929SKazu Hirata   ExportClustering() = default;
26e3ffe726SCarl Ritson   void apply(ScheduleDAGInstrs *DAG) override;
27e3ffe726SCarl Ritson };
28e3ffe726SCarl Ritson 
isExport(const SUnit & SU)29e3ffe726SCarl Ritson static bool isExport(const SUnit &SU) {
30f94fd1c8SJay Foad   return SIInstrInfo::isEXP(*SU.getInstr());
31e3ffe726SCarl Ritson }
32e3ffe726SCarl Ritson 
isPositionExport(const SIInstrInfo * TII,SUnit * SU)3358f1417eSCarl Ritson static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) {
3458f1417eSCarl Ritson   const MachineInstr *MI = SU->getInstr();
35745064e3SDmitry Preobrazhensky   unsigned Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm();
366881a82eSJay Foad   return Imm >= AMDGPU::Exp::ET_POS0 && Imm <= AMDGPU::Exp::ET_POS_LAST;
3758f1417eSCarl Ritson }
3858f1417eSCarl Ritson 
sortChain(const SIInstrInfo * TII,SmallVector<SUnit *,8> & Chain,unsigned PosCount)3958f1417eSCarl Ritson static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain,
4058f1417eSCarl Ritson                       unsigned PosCount) {
4158f1417eSCarl Ritson   if (!PosCount || PosCount == Chain.size())
4258f1417eSCarl Ritson     return;
4358f1417eSCarl Ritson 
4458f1417eSCarl Ritson   // Position exports should occur as soon as possible in the shader
4558f1417eSCarl Ritson   // for optimal performance.  This moves position exports before
4658f1417eSCarl Ritson   // other exports while preserving the order within different export
4758f1417eSCarl Ritson   // types (pos or other).
4858f1417eSCarl Ritson   SmallVector<SUnit *, 8> Copy(Chain);
4958f1417eSCarl Ritson   unsigned PosIdx = 0;
5058f1417eSCarl Ritson   unsigned OtherIdx = PosCount;
5158f1417eSCarl Ritson   for (SUnit *SU : Copy) {
5258f1417eSCarl Ritson     if (isPositionExport(TII, SU))
5358f1417eSCarl Ritson       Chain[PosIdx++] = SU;
5458f1417eSCarl Ritson     else
5558f1417eSCarl Ritson       Chain[OtherIdx++] = SU;
5658f1417eSCarl Ritson   }
5758f1417eSCarl Ritson }
5858f1417eSCarl Ritson 
buildCluster(ArrayRef<SUnit * > Exports,ScheduleDAGInstrs * DAG)59e3ffe726SCarl Ritson static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) {
6058f1417eSCarl Ritson   SUnit *ChainHead = Exports.front();
6158f1417eSCarl Ritson 
6258f1417eSCarl Ritson   // Now construct cluster from chain by adding new edges.
63e3ffe726SCarl Ritson   for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) {
64e3ffe726SCarl Ritson     SUnit *SUa = Exports[Idx];
65e3ffe726SCarl Ritson     SUnit *SUb = Exports[Idx + 1];
6658f1417eSCarl Ritson 
6758f1417eSCarl Ritson     // Copy all dependencies to the head of the chain to avoid any
6858f1417eSCarl Ritson     // computation being inserted into the chain.
69e3ffe726SCarl Ritson     for (const SDep &Pred : SUb->Preds) {
70e3ffe726SCarl Ritson       SUnit *PredSU = Pred.getSUnit();
7158f1417eSCarl Ritson       if (!isExport(*PredSU) && !Pred.isWeak())
72e3ffe726SCarl Ritson         DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial));
73e3ffe726SCarl Ritson     }
7458f1417eSCarl Ritson 
7558f1417eSCarl Ritson     // New barrier edge ordering exports
7658f1417eSCarl Ritson     DAG->addEdge(SUb, SDep(SUa, SDep::Barrier));
7758f1417eSCarl Ritson     // Also add cluster edge
7858f1417eSCarl Ritson     DAG->addEdge(SUb, SDep(SUa, SDep::Cluster));
79e3ffe726SCarl Ritson   }
80e3ffe726SCarl Ritson }
81e3ffe726SCarl Ritson 
removeExportDependencies(ScheduleDAGInstrs * DAG,SUnit & SU)82195de442SCarl Ritson static void removeExportDependencies(ScheduleDAGInstrs *DAG, SUnit &SU) {
83195de442SCarl Ritson   SmallVector<SDep, 2> ToAdd, ToRemove;
84195de442SCarl Ritson 
85195de442SCarl Ritson   for (const SDep &Pred : SU.Preds) {
86195de442SCarl Ritson     SUnit *PredSU = Pred.getSUnit();
87195de442SCarl Ritson     if (Pred.isBarrier() && isExport(*PredSU)) {
88195de442SCarl Ritson       ToRemove.push_back(Pred);
89195de442SCarl Ritson       if (isExport(SU))
90195de442SCarl Ritson         continue;
91195de442SCarl Ritson 
92195de442SCarl Ritson       // If we remove a barrier we need to copy dependencies
93195de442SCarl Ritson       // from the predecessor to maintain order.
94195de442SCarl Ritson       for (const SDep &ExportPred : PredSU->Preds) {
95195de442SCarl Ritson         SUnit *ExportPredSU = ExportPred.getSUnit();
96195de442SCarl Ritson         if (ExportPred.isBarrier() && !isExport(*ExportPredSU))
97195de442SCarl Ritson           ToAdd.push_back(SDep(ExportPredSU, SDep::Barrier));
98195de442SCarl Ritson       }
99195de442SCarl Ritson     }
100195de442SCarl Ritson   }
101195de442SCarl Ritson 
102195de442SCarl Ritson   for (SDep Pred : ToRemove)
103195de442SCarl Ritson     SU.removePred(Pred);
104195de442SCarl Ritson   for (SDep Pred : ToAdd)
105195de442SCarl Ritson     DAG->addEdge(&SU, Pred);
106195de442SCarl Ritson }
107195de442SCarl Ritson 
apply(ScheduleDAGInstrs * DAG)108e3ffe726SCarl Ritson void ExportClustering::apply(ScheduleDAGInstrs *DAG) {
10958f1417eSCarl Ritson   const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII);
110e3ffe726SCarl Ritson 
11158f1417eSCarl Ritson   SmallVector<SUnit *, 8> Chain;
11258f1417eSCarl Ritson 
11358f1417eSCarl Ritson   // Pass through DAG gathering a list of exports and removing barrier edges
11458f1417eSCarl Ritson   // creating dependencies on exports. Freeing exports of successor edges
11558f1417eSCarl Ritson   // allows more scheduling freedom, and nothing should be order dependent
11658f1417eSCarl Ritson   // on exports.  Edges will be added later to order the exports.
11758f1417eSCarl Ritson   unsigned PosCount = 0;
118e3ffe726SCarl Ritson   for (SUnit &SU : DAG->SUnits) {
119195de442SCarl Ritson     if (!isExport(SU))
120195de442SCarl Ritson       continue;
121195de442SCarl Ritson 
122e3ffe726SCarl Ritson     Chain.push_back(&SU);
12358f1417eSCarl Ritson     if (isPositionExport(TII, &SU))
12458f1417eSCarl Ritson       PosCount++;
125e3ffe726SCarl Ritson 
126195de442SCarl Ritson     removeExportDependencies(DAG, SU);
127195de442SCarl Ritson 
128195de442SCarl Ritson     SmallVector<SDep, 4> Succs(SU.Succs);
129195de442SCarl Ritson     for (SDep Succ : Succs)
130195de442SCarl Ritson       removeExportDependencies(DAG, *Succ.getSUnit());
13158f1417eSCarl Ritson   }
13258f1417eSCarl Ritson 
13358f1417eSCarl Ritson   // Apply clustering if there are multiple exports
13458f1417eSCarl Ritson   if (Chain.size() > 1) {
13558f1417eSCarl Ritson     sortChain(TII, Chain, PosCount);
136e3ffe726SCarl Ritson     buildCluster(Chain, DAG);
137e3ffe726SCarl Ritson   }
13858f1417eSCarl Ritson }
139e3ffe726SCarl Ritson 
140e3ffe726SCarl Ritson } // end namespace
141e3ffe726SCarl Ritson 
142e3ffe726SCarl Ritson namespace llvm {
143e3ffe726SCarl Ritson 
createAMDGPUExportClusteringDAGMutation()144e3ffe726SCarl Ritson std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() {
145e3ffe726SCarl Ritson   return std::make_unique<ExportClustering>();
146e3ffe726SCarl Ritson }
147e3ffe726SCarl Ritson 
148e3ffe726SCarl Ritson } // end namespace llvm
149