1 //===--- AMDGPUExportClusting.cpp - AMDGPU Export Clustering -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file This file contains a DAG scheduling mutation to cluster shader 10 /// exports. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPUExportClustering.h" 15 #include "AMDGPUSubtarget.h" 16 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 17 #include "SIInstrInfo.h" 18 19 using namespace llvm; 20 21 namespace { 22 23 class ExportClustering : public ScheduleDAGMutation { 24 public: 25 ExportClustering() {} 26 void apply(ScheduleDAGInstrs *DAG) override; 27 }; 28 29 static bool isExport(const SUnit &SU) { 30 const MachineInstr *MI = SU.getInstr(); 31 return MI->getOpcode() == AMDGPU::EXP || 32 MI->getOpcode() == AMDGPU::EXP_DONE; 33 } 34 35 static bool isPositionExport(const SIInstrInfo *TII, SUnit *SU) { 36 const MachineInstr *MI = SU->getInstr(); 37 int Imm = TII->getNamedOperand(*MI, AMDGPU::OpName::tgt)->getImm(); 38 return Imm >= 12 && Imm <= 15; 39 } 40 41 static void sortChain(const SIInstrInfo *TII, SmallVector<SUnit *, 8> &Chain, 42 unsigned PosCount) { 43 if (!PosCount || PosCount == Chain.size()) 44 return; 45 46 // Position exports should occur as soon as possible in the shader 47 // for optimal performance. This moves position exports before 48 // other exports while preserving the order within different export 49 // types (pos or other). 50 SmallVector<SUnit *, 8> Copy(Chain); 51 unsigned PosIdx = 0; 52 unsigned OtherIdx = PosCount; 53 for (SUnit *SU : Copy) { 54 if (isPositionExport(TII, SU)) 55 Chain[PosIdx++] = SU; 56 else 57 Chain[OtherIdx++] = SU; 58 } 59 } 60 61 static void buildCluster(ArrayRef<SUnit *> Exports, ScheduleDAGInstrs *DAG) { 62 SUnit *ChainHead = Exports.front(); 63 64 // Now construct cluster from chain by adding new edges. 65 for (unsigned Idx = 0, End = Exports.size() - 1; Idx < End; ++Idx) { 66 SUnit *SUa = Exports[Idx]; 67 SUnit *SUb = Exports[Idx + 1]; 68 69 // Copy all dependencies to the head of the chain to avoid any 70 // computation being inserted into the chain. 71 for (const SDep &Pred : SUb->Preds) { 72 SUnit *PredSU = Pred.getSUnit(); 73 if (!isExport(*PredSU) && !Pred.isWeak()) 74 DAG->addEdge(ChainHead, SDep(PredSU, SDep::Artificial)); 75 } 76 77 // New barrier edge ordering exports 78 DAG->addEdge(SUb, SDep(SUa, SDep::Barrier)); 79 // Also add cluster edge 80 DAG->addEdge(SUb, SDep(SUa, SDep::Cluster)); 81 } 82 } 83 84 void ExportClustering::apply(ScheduleDAGInstrs *DAG) { 85 const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(DAG->TII); 86 87 SmallVector<SUnit *, 8> Chain; 88 89 // Pass through DAG gathering a list of exports and removing barrier edges 90 // creating dependencies on exports. Freeing exports of successor edges 91 // allows more scheduling freedom, and nothing should be order dependent 92 // on exports. Edges will be added later to order the exports. 93 unsigned PosCount = 0; 94 for (SUnit &SU : DAG->SUnits) { 95 if (isExport(SU)) { 96 Chain.push_back(&SU); 97 if (isPositionExport(TII, &SU)) 98 PosCount++; 99 } 100 101 SmallVector<SDep, 2> ToRemove; 102 for (const SDep &Pred : SU.Preds) { 103 SUnit *PredSU = Pred.getSUnit(); 104 if (Pred.isBarrier() && isExport(*PredSU)) 105 ToRemove.push_back(Pred); 106 } 107 for (SDep Pred : ToRemove) 108 SU.removePred(Pred); 109 } 110 111 // Apply clustering if there are multiple exports 112 if (Chain.size() > 1) { 113 sortChain(TII, Chain, PosCount); 114 buildCluster(Chain, DAG); 115 } 116 } 117 118 } // end namespace 119 120 namespace llvm { 121 122 std::unique_ptr<ScheduleDAGMutation> createAMDGPUExportClusteringDAGMutation() { 123 return std::make_unique<ExportClustering>(); 124 } 125 126 } // end namespace llvm 127