1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Insert s_clause instructions to form hard clauses. 11 /// 12 /// Clausing load instructions can give cache coherency benefits. Before gfx10, 13 /// the hardware automatically detected "soft clauses", which were sequences of 14 /// memory instructions of the same type. In gfx10 this detection was removed, 15 /// and the s_clause instruction was introduced to explicitly mark "hard 16 /// clauses". 17 /// 18 /// It's the scheduler's job to form the clauses by putting similar memory 19 /// instructions next to each other. Our job is just to insert an s_clause 20 /// instruction to mark the start of each clause. 21 /// 22 /// Note that hard clauses are very similar to, but logically distinct from, the 23 /// groups of instructions that have to be restartable when XNACK is enabled. 24 /// The rules are slightly different in each case. For example an s_nop 25 /// instruction breaks a restartable group, but can appear in the middle of a 26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 27 /// "soft clauses" or just "clauses".) 28 /// 29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 30 /// groups, not hard clauses. 31 // 32 //===----------------------------------------------------------------------===// 33 34 #include "AMDGPU.h" 35 #include "GCNSubtarget.h" 36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 37 #include "llvm/ADT/SmallVector.h" 38 #include "llvm/CodeGen/MachineFunctionPass.h" 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "si-insert-hard-clauses" 43 44 namespace { 45 46 enum HardClauseType { 47 // Texture, buffer, global or scratch memory instructions. 48 HARDCLAUSE_VMEM, 49 // Flat (not global or scratch) memory instructions. 50 HARDCLAUSE_FLAT, 51 // Instructions that access LDS. 52 HARDCLAUSE_LDS, 53 // Scalar memory instructions. 54 HARDCLAUSE_SMEM, 55 // VALU instructions. 56 HARDCLAUSE_VALU, 57 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 58 59 // Internal instructions, which are allowed in the middle of a hard clause, 60 // except for s_waitcnt. 61 HARDCLAUSE_INTERNAL, 62 // Meta instructions that do not result in any ISA like KILL. 63 HARDCLAUSE_IGNORE, 64 // Instructions that are not allowed in a hard clause: SALU, export, branch, 65 // message, GDS, s_waitcnt and anything else not mentioned above. 66 HARDCLAUSE_ILLEGAL, 67 }; 68 69 class SIInsertHardClauses : public MachineFunctionPass { 70 public: 71 static char ID; 72 const GCNSubtarget *ST = nullptr; 73 74 SIInsertHardClauses() : MachineFunctionPass(ID) {} 75 76 void getAnalysisUsage(AnalysisUsage &AU) const override { 77 AU.setPreservesCFG(); 78 MachineFunctionPass::getAnalysisUsage(AU); 79 } 80 81 HardClauseType getHardClauseType(const MachineInstr &MI) { 82 83 // On current architectures we only get a benefit from clausing loads. 84 if (MI.mayLoad()) { 85 if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { 86 if (ST->hasNSAClauseBug()) { 87 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 88 if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) 89 return HARDCLAUSE_ILLEGAL; 90 } 91 return HARDCLAUSE_VMEM; 92 } 93 if (SIInstrInfo::isFLAT(MI)) 94 return HARDCLAUSE_FLAT; 95 // TODO: LDS 96 if (SIInstrInfo::isSMRD(MI)) 97 return HARDCLAUSE_SMEM; 98 } 99 100 // Don't form VALU clauses. It's not clear what benefit they give, if any. 101 102 // In practice s_nop is the only internal instruction we're likely to see. 103 // It's safe to treat the rest as illegal. 104 if (MI.getOpcode() == AMDGPU::S_NOP) 105 return HARDCLAUSE_INTERNAL; 106 if (MI.isMetaInstruction()) 107 return HARDCLAUSE_IGNORE; 108 return HARDCLAUSE_ILLEGAL; 109 } 110 111 // Track information about a clause as we discover it. 112 struct ClauseInfo { 113 // The type of all (non-internal) instructions in the clause. 114 HardClauseType Type = HARDCLAUSE_ILLEGAL; 115 // The first (necessarily non-internal) instruction in the clause. 116 MachineInstr *First = nullptr; 117 // The last non-internal instruction in the clause. 118 MachineInstr *Last = nullptr; 119 // The length of the clause including any internal instructions in the 120 // middle (but not at the end) of the clause. 121 unsigned Length = 0; 122 // Internal instructions at the and of a clause should not be included in 123 // the clause. Count them in TrailingInternalLength until a new memory 124 // instruction is added. 125 unsigned TrailingInternalLength = 0; 126 // The base operands of *Last. 127 SmallVector<const MachineOperand *, 4> BaseOps; 128 }; 129 130 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 131 if (CI.First == CI.Last) 132 return false; 133 assert(CI.Length <= 64 && "Hard clause is too long!"); 134 135 auto &MBB = *CI.First->getParent(); 136 auto ClauseMI = 137 BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 138 .addImm(CI.Length - 1); 139 finalizeBundle(MBB, ClauseMI->getIterator(), 140 std::next(CI.Last->getIterator())); 141 return true; 142 } 143 144 bool runOnMachineFunction(MachineFunction &MF) override { 145 if (skipFunction(MF.getFunction())) 146 return false; 147 148 ST = &MF.getSubtarget<GCNSubtarget>(); 149 if (!ST->hasHardClauses()) 150 return false; 151 152 const SIInstrInfo *SII = ST->getInstrInfo(); 153 const TargetRegisterInfo *TRI = ST->getRegisterInfo(); 154 155 bool Changed = false; 156 for (auto &MBB : MF) { 157 ClauseInfo CI; 158 for (auto &MI : MBB) { 159 HardClauseType Type = getHardClauseType(MI); 160 161 int64_t Dummy1; 162 bool Dummy2; 163 unsigned Dummy3; 164 SmallVector<const MachineOperand *, 4> BaseOps; 165 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 166 if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, 167 Dummy3, TRI)) { 168 // We failed to get the base operands, so we'll never clause this 169 // instruction with any other, so pretend it's illegal. 170 Type = HARDCLAUSE_ILLEGAL; 171 } 172 } 173 174 if (CI.Length == 64 || 175 (CI.Length && Type != HARDCLAUSE_INTERNAL && 176 Type != HARDCLAUSE_IGNORE && 177 (Type != CI.Type || 178 // Note that we lie to shouldClusterMemOps about the size of the 179 // cluster. When shouldClusterMemOps is called from the machine 180 // scheduler it limits the size of the cluster to avoid increasing 181 // register pressure too much, but this pass runs after register 182 // allocation so there is no need for that kind of limit. 183 !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) { 184 // Finish the current clause. 185 Changed |= emitClause(CI, SII); 186 CI = ClauseInfo(); 187 } 188 189 if (CI.Length) { 190 // Extend the current clause. 191 if (Type != HARDCLAUSE_IGNORE) { 192 if (Type == HARDCLAUSE_INTERNAL) { 193 ++CI.TrailingInternalLength; 194 } else { 195 ++CI.Length; 196 CI.Length += CI.TrailingInternalLength; 197 CI.TrailingInternalLength = 0; 198 CI.Last = &MI; 199 CI.BaseOps = std::move(BaseOps); 200 } 201 } 202 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 203 // Start a new clause. 204 CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)}; 205 } 206 } 207 208 // Finish the last clause in the basic block if any. 209 if (CI.Length) 210 Changed |= emitClause(CI, SII); 211 } 212 213 return Changed; 214 } 215 }; 216 217 } // namespace 218 219 char SIInsertHardClauses::ID = 0; 220 221 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; 222 223 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", 224 false, false) 225