1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// Insert s_clause instructions to form hard clauses. 11 /// 12 /// Clausing load instructions can give cache coherency benefits. Before gfx10, 13 /// the hardware automatically detected "soft clauses", which were sequences of 14 /// memory instructions of the same type. In gfx10 this detection was removed, 15 /// and the s_clause instruction was introduced to explicitly mark "hard 16 /// clauses". 17 /// 18 /// It's the scheduler's job to form the clauses by putting similar memory 19 /// instructions next to each other. Our job is just to insert an s_clause 20 /// instruction to mark the start of each clause. 21 /// 22 /// Note that hard clauses are very similar to, but logically distinct from, the 23 /// groups of instructions that have to be restartable when XNACK is enabled. 24 /// The rules are slightly different in each case. For example an s_nop 25 /// instruction breaks a restartable group, but can appear in the middle of a 26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 27 /// "soft clauses" or just "clauses".) 28 /// 29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 30 /// groups, not hard clauses. 31 // 32 //===----------------------------------------------------------------------===// 33 34 #include "AMDGPU.h" 35 #include "GCNSubtarget.h" 36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 37 #include "llvm/ADT/SmallVector.h" 38 39 using namespace llvm; 40 41 #define DEBUG_TYPE "si-insert-hard-clauses" 42 43 namespace { 44 45 enum HardClauseType { 46 // Texture, buffer, global or scratch memory instructions. 47 HARDCLAUSE_VMEM, 48 // Flat (not global or scratch) memory instructions. 49 HARDCLAUSE_FLAT, 50 // Instructions that access LDS. 51 HARDCLAUSE_LDS, 52 // Scalar memory instructions. 53 HARDCLAUSE_SMEM, 54 // VALU instructions. 55 HARDCLAUSE_VALU, 56 LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 57 58 // Internal instructions, which are allowed in the middle of a hard clause, 59 // except for s_waitcnt. 60 HARDCLAUSE_INTERNAL, 61 // Instructions that are not allowed in a hard clause: SALU, export, branch, 62 // message, GDS, s_waitcnt and anything else not mentioned above. 63 HARDCLAUSE_ILLEGAL, 64 }; 65 66 class SIInsertHardClauses : public MachineFunctionPass { 67 public: 68 static char ID; 69 const GCNSubtarget *ST = nullptr; 70 71 SIInsertHardClauses() : MachineFunctionPass(ID) {} 72 73 void getAnalysisUsage(AnalysisUsage &AU) const override { 74 AU.setPreservesCFG(); 75 MachineFunctionPass::getAnalysisUsage(AU); 76 } 77 78 HardClauseType getHardClauseType(const MachineInstr &MI) { 79 // On current architectures we only get a benefit from clausing loads. 80 if (MI.mayLoad()) { 81 if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) 82 return HARDCLAUSE_VMEM; 83 if (SIInstrInfo::isFLAT(MI)) 84 return HARDCLAUSE_FLAT; 85 // TODO: LDS 86 if (SIInstrInfo::isSMRD(MI)) 87 return HARDCLAUSE_SMEM; 88 } 89 90 // Don't form VALU clauses. It's not clear what benefit they give, if any. 91 92 // In practice s_nop is the only internal instruction we're likely to see. 93 // It's safe to treat the rest as illegal. 94 if (MI.getOpcode() == AMDGPU::S_NOP) 95 return HARDCLAUSE_INTERNAL; 96 return HARDCLAUSE_ILLEGAL; 97 } 98 99 // Track information about a clause as we discover it. 100 struct ClauseInfo { 101 // The type of all (non-internal) instructions in the clause. 102 HardClauseType Type = HARDCLAUSE_ILLEGAL; 103 // The first (necessarily non-internal) instruction in the clause. 104 MachineInstr *First = nullptr; 105 // The last non-internal instruction in the clause. 106 MachineInstr *Last = nullptr; 107 // The length of the clause including any internal instructions in the 108 // middle or after the end of the clause. 109 unsigned Length = 0; 110 // The base operands of *Last. 111 SmallVector<const MachineOperand *, 4> BaseOps; 112 }; 113 114 bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 115 // Get the size of the clause excluding any internal instructions at the 116 // end. 117 unsigned Size = 118 std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1; 119 if (Size < 2) 120 return false; 121 assert(Size <= 64 && "Hard clause is too long!"); 122 123 auto &MBB = *CI.First->getParent(); 124 auto ClauseMI = 125 BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 126 .addImm(Size - 1); 127 finalizeBundle(MBB, ClauseMI->getIterator(), 128 std::next(CI.Last->getIterator())); 129 return true; 130 } 131 132 bool runOnMachineFunction(MachineFunction &MF) override { 133 if (skipFunction(MF.getFunction())) 134 return false; 135 136 ST = &MF.getSubtarget<GCNSubtarget>(); 137 if (!ST->hasHardClauses()) 138 return false; 139 140 const SIInstrInfo *SII = ST->getInstrInfo(); 141 const TargetRegisterInfo *TRI = ST->getRegisterInfo(); 142 143 bool Changed = false; 144 for (auto &MBB : MF) { 145 ClauseInfo CI; 146 for (auto &MI : MBB) { 147 HardClauseType Type = getHardClauseType(MI); 148 149 int64_t Dummy1; 150 bool Dummy2; 151 unsigned Dummy3; 152 SmallVector<const MachineOperand *, 4> BaseOps; 153 if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 154 if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, 155 Dummy3, TRI)) { 156 // We failed to get the base operands, so we'll never clause this 157 // instruction with any other, so pretend it's illegal. 158 Type = HARDCLAUSE_ILLEGAL; 159 } 160 } 161 162 if (CI.Length == 64 || 163 (CI.Length && Type != HARDCLAUSE_INTERNAL && 164 (Type != CI.Type || 165 // Note that we lie to shouldClusterMemOps about the size of the 166 // cluster. When shouldClusterMemOps is called from the machine 167 // scheduler it limits the size of the cluster to avoid increasing 168 // register pressure too much, but this pass runs after register 169 // allocation so there is no need for that kind of limit. 170 !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) { 171 // Finish the current clause. 172 Changed |= emitClause(CI, SII); 173 CI = ClauseInfo(); 174 } 175 176 if (CI.Length) { 177 // Extend the current clause. 178 ++CI.Length; 179 if (Type != HARDCLAUSE_INTERNAL) { 180 CI.Last = &MI; 181 CI.BaseOps = std::move(BaseOps); 182 } 183 } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 184 // Start a new clause. 185 CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)}; 186 } 187 } 188 189 // Finish the last clause in the basic block if any. 190 if (CI.Length) 191 Changed |= emitClause(CI, SII); 192 } 193 194 return Changed; 195 } 196 }; 197 198 } // namespace 199 200 char SIInsertHardClauses::ID = 0; 201 202 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; 203 204 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", 205 false, false) 206