1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Insert s_clause instructions to form hard clauses. 11 /// 12 /// Clausing load instructions can give cache coherency benefits. Before gfx10, 13 /// the hardware automatically detected "soft clauses", which were sequences of 14 /// memory instructions of the same type. In gfx10 this detection was removed, 15 /// and the s_clause instruction was introduced to explicitly mark "hard 16 /// clauses". 17 /// 18 /// It's the scheduler's job to form the clauses by putting similar memory 19 /// instructions next to each other. Our job is just to insert an s_clause 20 /// instruction to mark the start of each clause. 21 /// 22 /// Note that hard clauses are very similar to, but logically distinct from, the 23 /// groups of instructions that have to be restartable when XNACK is enabled. 24 /// The rules are slightly different in each case. For example an s_nop 25 /// instruction breaks a restartable group, but can appear in the middle of a 26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 27 /// "soft clauses" or just "clauses".) 28 /// 29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 30 /// groups, not hard clauses. 31 // 32 //===----------------------------------------------------------------------===// 33 34 #include "AMDGPUSubtarget.h" 35 #include "SIInstrInfo.h" 36 #include "llvm/ADT/SmallVector.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "si-insert-hard-clauses" 41 42 namespace { 43 44 enum HardClauseType { 45 // Texture, buffer, global or scratch memory instructions. 46 HARDCLAUSE_VMEM, 47 // Flat (not global or scratch) memory instructions. 48 HARDCLAUSE_FLAT, 49 // Instructions that access LDS. 50 HARDCLAUSE_LDS, 51 // Scalar memory instructions. 52 HARDCLAUSE_SMEM, 53 // VALU instructions. 54 HARDCLAUSE_VALU, 55 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 56 57 // Internal instructions, which are allowed in the middle of a hard clause, 58 // except for s_waitcnt. 59 HARDCLAUSE_INTERNAL, 60 // Instructions that are not allowed in a hard clause: SALU, export, branch, 61 // message, GDS, s_waitcnt and anything else not mentioned above. 62 HARDCLAUSE_ILLEGAL, 63 }; 64 65 HardClauseType getHardClauseType(const MachineInstr &MI) { 66 // On current architectures we only get a benefit from clausing loads. 67 if (MI.mayLoad()) { 68 if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) 69 return HARDCLAUSE_VMEM; 70 if (SIInstrInfo::isFLAT(MI)) 71 return HARDCLAUSE_FLAT; 72 // TODO: LDS 73 if (SIInstrInfo::isSMRD(MI)) 74 return HARDCLAUSE_SMEM; 75 } 76 77 // Don't form VALU clauses. It's not clear what benefit they give, if any. 78 79 // In practice s_nop is the only internal instructions we're likely to see. 80 // It's safe to treat the rest as illegal. 81 if (MI.getOpcode() == AMDGPU::S_NOP) 82 return HARDCLAUSE_INTERNAL; 83 return HARDCLAUSE_ILLEGAL; 84 } 85 86 class SIInsertHardClauses : public MachineFunctionPass { 87 public: 88 static char ID; 89 90 SIInsertHardClauses() : MachineFunctionPass(ID) {} 91 92 void getAnalysisUsage(AnalysisUsage &AU) const override { 93 AU.setPreservesCFG(); 94 MachineFunctionPass::getAnalysisUsage(AU); 95 } 96 97 // Track information about a clause as we discover it. 98 struct ClauseInfo { 99 // The type of all (non-internal) instructions in the clause. 100 HardClauseType Type = HARDCLAUSE_ILLEGAL; 101 // The first (necessarily non-internal) instruction in the clause. 102 MachineInstr *First = nullptr; 103 // The last non-internal instruction in the clause. 104 MachineInstr *Last = nullptr; 105 // The length of the clause including any internal instructions in the 106 // middle. 107 unsigned Length = 0; 108 // The base operands of *Last. 109 SmallVector<const MachineOperand *, 4> BaseOps; 110 }; 111 112 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 113 assert(CI.Length == 114 std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1); 115 if (CI.Length < 2) 116 return false; 117 assert(CI.Length <= 64 && "Hard clause is too long!"); 118 119 auto &MBB = *CI.First->getParent(); 120 auto ClauseMI = 121 BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 122 .addImm(CI.Length - 1); 123 finalizeBundle(MBB, ClauseMI->getIterator(), 124 std::next(CI.Last->getIterator())); 125 return true; 126 } 127 128 bool runOnMachineFunction(MachineFunction &MF) override { 129 if (skipFunction(MF.getFunction())) 130 return false; 131 132 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 133 if (!ST.hasHardClauses()) 134 return false; 135 136 const SIInstrInfo *SII = ST.getInstrInfo(); 137 const TargetRegisterInfo *TRI = ST.getRegisterInfo(); 138 139 bool Changed = false; 140 for (auto &MBB : MF) { 141 ClauseInfo CI; 142 for (auto &MI : MBB) { 143 HardClauseType Type = getHardClauseType(MI); 144 145 int64_t Dummy1; 146 bool Dummy2; 147 SmallVector<const MachineOperand *, 4> BaseOps; 148 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 149 if (!SII->getMemOperandsWithOffset(MI, BaseOps, Dummy1, Dummy2, 150 TRI)) { 151 // We failed to get the base operands, so we'll never clause this 152 // instruction with any other, so pretend it's illegal. 153 Type = HARDCLAUSE_ILLEGAL; 154 } 155 } 156 157 if (CI.Length == 64 || 158 (CI.Length && Type != HARDCLAUSE_INTERNAL && 159 (Type != CI.Type || 160 // Note that we lie to shouldClusterMemOps about the size of the 161 // cluster. When shouldClusterMemOps is called from the machine 162 // scheduler it limits the size of the cluster to avoid increasing 163 // register pressure too much, but this pass runs after register 164 // allocation so there is no need for that kind of limit. 165 !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2)))) { 166 // Finish the current clause. 167 Changed |= emitClause(CI, SII); 168 CI = ClauseInfo(); 169 } 170 171 if (CI.Length) { 172 // Extend the current clause. 173 ++CI.Length; 174 if (Type != HARDCLAUSE_INTERNAL) { 175 CI.Last = &MI; 176 CI.BaseOps = std::move(BaseOps); 177 } 178 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 179 // Start a new clause. 180 CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)}; 181 } 182 } 183 184 // Finish the last clause in the basic block if any. 185 if (CI.Length) 186 Changed |= emitClause(CI, SII); 187 } 188 189 return Changed; 190 } 191 }; 192 193 } // namespace 194 195 char SIInsertHardClauses::ID = 0; 196 197 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; 198 199 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", 200 false, false) 201