142a55605SJay Foad //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===// 242a55605SJay Foad // 342a55605SJay Foad // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 442a55605SJay Foad // See https://llvm.org/LICENSE.txt for license information. 542a55605SJay Foad // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 642a55605SJay Foad // 742a55605SJay Foad //===----------------------------------------------------------------------===// 842a55605SJay Foad // 942a55605SJay Foad /// \file 1042a55605SJay Foad /// Insert s_clause instructions to form hard clauses. 1142a55605SJay Foad /// 1242a55605SJay Foad /// Clausing load instructions can give cache coherency benefits. Before gfx10, 1342a55605SJay Foad /// the hardware automatically detected "soft clauses", which were sequences of 1442a55605SJay Foad /// memory instructions of the same type. In gfx10 this detection was removed, 1542a55605SJay Foad /// and the s_clause instruction was introduced to explicitly mark "hard 1642a55605SJay Foad /// clauses". 1742a55605SJay Foad /// 1842a55605SJay Foad /// It's the scheduler's job to form the clauses by putting similar memory 1942a55605SJay Foad /// instructions next to each other. Our job is just to insert an s_clause 2042a55605SJay Foad /// instruction to mark the start of each clause. 2142a55605SJay Foad /// 2242a55605SJay Foad /// Note that hard clauses are very similar to, but logically distinct from, the 2342a55605SJay Foad /// groups of instructions that have to be restartable when XNACK is enabled. 2442a55605SJay Foad /// The rules are slightly different in each case. For example an s_nop 2542a55605SJay Foad /// instruction breaks a restartable group, but can appear in the middle of a 2642a55605SJay Foad /// hard clause. (Before gfx10 there wasn't a distinction, and both were called 2742a55605SJay Foad /// "soft clauses" or just "clauses".) 2842a55605SJay Foad /// 2942a55605SJay Foad /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable 3042a55605SJay Foad /// groups, not hard clauses. 3142a55605SJay Foad // 3242a55605SJay Foad //===----------------------------------------------------------------------===// 3342a55605SJay Foad 346a87e9b0Sdfukalov #include "AMDGPU.h" 35560d7e04Sdfukalov #include "GCNSubtarget.h" 36560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 3742a55605SJay Foad #include "llvm/ADT/SmallVector.h" 38989f1c72Sserge-sans-paille #include "llvm/CodeGen/MachineFunctionPass.h" 3942a55605SJay Foad 4042a55605SJay Foad using namespace llvm; 4142a55605SJay Foad 4242a55605SJay Foad #define DEBUG_TYPE "si-insert-hard-clauses" 4342a55605SJay Foad 4442a55605SJay Foad namespace { 4542a55605SJay Foad 46*ffe86e3bSJay Foad // A clause length of 64 instructions could be encoded in the s_clause 47*ffe86e3bSJay Foad // instruction, but the hardware documentation (at least for GFX11) says that 48*ffe86e3bSJay Foad // 63 is the maximum allowed. 49*ffe86e3bSJay Foad constexpr unsigned MaxInstructionsInClause = 63; 50*ffe86e3bSJay Foad 5142a55605SJay Foad enum HardClauseType { 52*ffe86e3bSJay Foad // For GFX10: 53*ffe86e3bSJay Foad 5442a55605SJay Foad // Texture, buffer, global or scratch memory instructions. 5542a55605SJay Foad HARDCLAUSE_VMEM, 5642a55605SJay Foad // Flat (not global or scratch) memory instructions. 5742a55605SJay Foad HARDCLAUSE_FLAT, 58*ffe86e3bSJay Foad 59*ffe86e3bSJay Foad // For GFX11: 60*ffe86e3bSJay Foad 61*ffe86e3bSJay Foad // Texture memory instructions. 62*ffe86e3bSJay Foad HARDCLAUSE_MIMG_LOAD, 63*ffe86e3bSJay Foad HARDCLAUSE_MIMG_STORE, 64*ffe86e3bSJay Foad HARDCLAUSE_MIMG_ATOMIC, 65*ffe86e3bSJay Foad HARDCLAUSE_MIMG_SAMPLE, 66*ffe86e3bSJay Foad // Buffer, global or scratch memory instructions. 67*ffe86e3bSJay Foad HARDCLAUSE_VMEM_LOAD, 68*ffe86e3bSJay Foad HARDCLAUSE_VMEM_STORE, 69*ffe86e3bSJay Foad HARDCLAUSE_VMEM_ATOMIC, 70*ffe86e3bSJay Foad // Flat (not global or scratch) memory instructions. 71*ffe86e3bSJay Foad HARDCLAUSE_FLAT_LOAD, 72*ffe86e3bSJay Foad HARDCLAUSE_FLAT_STORE, 73*ffe86e3bSJay Foad HARDCLAUSE_FLAT_ATOMIC, 74*ffe86e3bSJay Foad // BVH instructions. 75*ffe86e3bSJay Foad HARDCLAUSE_BVH, 76*ffe86e3bSJay Foad 77*ffe86e3bSJay Foad // Common: 78*ffe86e3bSJay Foad 7942a55605SJay Foad // Instructions that access LDS. 8042a55605SJay Foad HARDCLAUSE_LDS, 8142a55605SJay Foad // Scalar memory instructions. 8242a55605SJay Foad HARDCLAUSE_SMEM, 8342a55605SJay Foad // VALU instructions. 8442a55605SJay Foad HARDCLAUSE_VALU, 8542a55605SJay Foad LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU, 8642a55605SJay Foad 8742a55605SJay Foad // Internal instructions, which are allowed in the middle of a hard clause, 8842a55605SJay Foad // except for s_waitcnt. 8942a55605SJay Foad HARDCLAUSE_INTERNAL, 90bf980930SSebastian Neubauer // Meta instructions that do not result in any ISA like KILL. 91bf980930SSebastian Neubauer HARDCLAUSE_IGNORE, 9242a55605SJay Foad // Instructions that are not allowed in a hard clause: SALU, export, branch, 9342a55605SJay Foad // message, GDS, s_waitcnt and anything else not mentioned above. 9442a55605SJay Foad HARDCLAUSE_ILLEGAL, 9542a55605SJay Foad }; 9642a55605SJay Foad 979e026273SJay Foad class SIInsertHardClauses : public MachineFunctionPass { 989e026273SJay Foad public: 999e026273SJay Foad static char ID; 1009e026273SJay Foad const GCNSubtarget *ST = nullptr; 1019e026273SJay Foad SIInsertHardClauses()1029e026273SJay Foad SIInsertHardClauses() : MachineFunctionPass(ID) {} 1039e026273SJay Foad getAnalysisUsage(AnalysisUsage & AU) const1049e026273SJay Foad void getAnalysisUsage(AnalysisUsage &AU) const override { 1059e026273SJay Foad AU.setPreservesCFG(); 1069e026273SJay Foad MachineFunctionPass::getAnalysisUsage(AU); 1079e026273SJay Foad } 1089e026273SJay Foad getHardClauseType(const MachineInstr & MI)10942a55605SJay Foad HardClauseType getHardClauseType(const MachineInstr &MI) { 110*ffe86e3bSJay Foad if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) { 111*ffe86e3bSJay Foad if (ST->getGeneration() == AMDGPUSubtarget::GFX10) { 1129cf6ff7aSCarl Ritson if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { 1139cf6ff7aSCarl Ritson if (ST->hasNSAClauseBug()) { 1149cf6ff7aSCarl Ritson const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 1159cf6ff7aSCarl Ritson if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA) 1169cf6ff7aSCarl Ritson return HARDCLAUSE_ILLEGAL; 1179cf6ff7aSCarl Ritson } 11842a55605SJay Foad return HARDCLAUSE_VMEM; 1199cf6ff7aSCarl Ritson } 12042a55605SJay Foad if (SIInstrInfo::isFLAT(MI)) 12142a55605SJay Foad return HARDCLAUSE_FLAT; 122*ffe86e3bSJay Foad } else { 123*ffe86e3bSJay Foad assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11); 124*ffe86e3bSJay Foad if (SIInstrInfo::isMIMG(MI)) { 125*ffe86e3bSJay Foad const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode()); 126*ffe86e3bSJay Foad const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo = 127*ffe86e3bSJay Foad AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode); 128*ffe86e3bSJay Foad if (BaseInfo->BVH) 129*ffe86e3bSJay Foad return HARDCLAUSE_BVH; 130*ffe86e3bSJay Foad if (BaseInfo->Sampler) 131*ffe86e3bSJay Foad return HARDCLAUSE_MIMG_SAMPLE; 132*ffe86e3bSJay Foad return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC 133*ffe86e3bSJay Foad : HARDCLAUSE_MIMG_LOAD 134*ffe86e3bSJay Foad : HARDCLAUSE_MIMG_STORE; 135*ffe86e3bSJay Foad } 136*ffe86e3bSJay Foad if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) { 137*ffe86e3bSJay Foad return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC 138*ffe86e3bSJay Foad : HARDCLAUSE_VMEM_LOAD 139*ffe86e3bSJay Foad : HARDCLAUSE_VMEM_STORE; 140*ffe86e3bSJay Foad } 141*ffe86e3bSJay Foad if (SIInstrInfo::isFLAT(MI)) { 142*ffe86e3bSJay Foad return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC 143*ffe86e3bSJay Foad : HARDCLAUSE_FLAT_LOAD 144*ffe86e3bSJay Foad : HARDCLAUSE_FLAT_STORE; 145*ffe86e3bSJay Foad } 146*ffe86e3bSJay Foad } 14742a55605SJay Foad // TODO: LDS 14842a55605SJay Foad if (SIInstrInfo::isSMRD(MI)) 14942a55605SJay Foad return HARDCLAUSE_SMEM; 15042a55605SJay Foad } 15142a55605SJay Foad 15242a55605SJay Foad // Don't form VALU clauses. It's not clear what benefit they give, if any. 15342a55605SJay Foad 15410c10f24SJay Foad // In practice s_nop is the only internal instruction we're likely to see. 15542a55605SJay Foad // It's safe to treat the rest as illegal. 15642a55605SJay Foad if (MI.getOpcode() == AMDGPU::S_NOP) 15742a55605SJay Foad return HARDCLAUSE_INTERNAL; 158bf980930SSebastian Neubauer if (MI.isMetaInstruction()) 159bf980930SSebastian Neubauer return HARDCLAUSE_IGNORE; 16042a55605SJay Foad return HARDCLAUSE_ILLEGAL; 16142a55605SJay Foad } 16242a55605SJay Foad 16342a55605SJay Foad // Track information about a clause as we discover it. 16442a55605SJay Foad struct ClauseInfo { 16542a55605SJay Foad // The type of all (non-internal) instructions in the clause. 16642a55605SJay Foad HardClauseType Type = HARDCLAUSE_ILLEGAL; 16742a55605SJay Foad // The first (necessarily non-internal) instruction in the clause. 16842a55605SJay Foad MachineInstr *First = nullptr; 16942a55605SJay Foad // The last non-internal instruction in the clause. 17042a55605SJay Foad MachineInstr *Last = nullptr; 17142a55605SJay Foad // The length of the clause including any internal instructions in the 172bf980930SSebastian Neubauer // middle (but not at the end) of the clause. 17342a55605SJay Foad unsigned Length = 0; 174bf980930SSebastian Neubauer // Internal instructions at the and of a clause should not be included in 175bf980930SSebastian Neubauer // the clause. Count them in TrailingInternalLength until a new memory 176bf980930SSebastian Neubauer // instruction is added. 177bf980930SSebastian Neubauer unsigned TrailingInternalLength = 0; 17842a55605SJay Foad // The base operands of *Last. 17942a55605SJay Foad SmallVector<const MachineOperand *, 4> BaseOps; 18042a55605SJay Foad }; 18142a55605SJay Foad emitClause(const ClauseInfo & CI,const SIInstrInfo * SII)18242a55605SJay Foad bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) { 183bf980930SSebastian Neubauer if (CI.First == CI.Last) 18442a55605SJay Foad return false; 185*ffe86e3bSJay Foad assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!"); 18642a55605SJay Foad 18742a55605SJay Foad auto &MBB = *CI.First->getParent(); 18842a55605SJay Foad auto ClauseMI = 18942a55605SJay Foad BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE)) 190bf980930SSebastian Neubauer .addImm(CI.Length - 1); 19142a55605SJay Foad finalizeBundle(MBB, ClauseMI->getIterator(), 19242a55605SJay Foad std::next(CI.Last->getIterator())); 19342a55605SJay Foad return true; 19442a55605SJay Foad } 19542a55605SJay Foad runOnMachineFunction(MachineFunction & MF)19642a55605SJay Foad bool runOnMachineFunction(MachineFunction &MF) override { 19742a55605SJay Foad if (skipFunction(MF.getFunction())) 19842a55605SJay Foad return false; 19942a55605SJay Foad 2009e026273SJay Foad ST = &MF.getSubtarget<GCNSubtarget>(); 2019e026273SJay Foad if (!ST->hasHardClauses()) 20242a55605SJay Foad return false; 20342a55605SJay Foad 2049e026273SJay Foad const SIInstrInfo *SII = ST->getInstrInfo(); 2059e026273SJay Foad const TargetRegisterInfo *TRI = ST->getRegisterInfo(); 20642a55605SJay Foad 20742a55605SJay Foad bool Changed = false; 20842a55605SJay Foad for (auto &MBB : MF) { 20942a55605SJay Foad ClauseInfo CI; 21042a55605SJay Foad for (auto &MI : MBB) { 21142a55605SJay Foad HardClauseType Type = getHardClauseType(MI); 21242a55605SJay Foad 21342a55605SJay Foad int64_t Dummy1; 21442a55605SJay Foad bool Dummy2; 2150ed2c046Shsmahesha unsigned Dummy3; 21642a55605SJay Foad SmallVector<const MachineOperand *, 4> BaseOps; 21742a55605SJay Foad if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 2180ed2c046Shsmahesha if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2, 2190ed2c046Shsmahesha Dummy3, TRI)) { 22042a55605SJay Foad // We failed to get the base operands, so we'll never clause this 22142a55605SJay Foad // instruction with any other, so pretend it's illegal. 22242a55605SJay Foad Type = HARDCLAUSE_ILLEGAL; 22342a55605SJay Foad } 22442a55605SJay Foad } 22542a55605SJay Foad 226*ffe86e3bSJay Foad if (CI.Length == MaxInstructionsInClause || 22742a55605SJay Foad (CI.Length && Type != HARDCLAUSE_INTERNAL && 228bf980930SSebastian Neubauer Type != HARDCLAUSE_IGNORE && 22942a55605SJay Foad (Type != CI.Type || 23042a55605SJay Foad // Note that we lie to shouldClusterMemOps about the size of the 23142a55605SJay Foad // cluster. When shouldClusterMemOps is called from the machine 23242a55605SJay Foad // scheduler it limits the size of the cluster to avoid increasing 23342a55605SJay Foad // register pressure too much, but this pass runs after register 23442a55605SJay Foad // allocation so there is no need for that kind of limit. 2350ed2c046Shsmahesha !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) { 23642a55605SJay Foad // Finish the current clause. 23742a55605SJay Foad Changed |= emitClause(CI, SII); 23842a55605SJay Foad CI = ClauseInfo(); 23942a55605SJay Foad } 24042a55605SJay Foad 24142a55605SJay Foad if (CI.Length) { 24242a55605SJay Foad // Extend the current clause. 243bf980930SSebastian Neubauer if (Type != HARDCLAUSE_IGNORE) { 244bf980930SSebastian Neubauer if (Type == HARDCLAUSE_INTERNAL) { 245bf980930SSebastian Neubauer ++CI.TrailingInternalLength; 246bf980930SSebastian Neubauer } else { 24742a55605SJay Foad ++CI.Length; 248bf980930SSebastian Neubauer CI.Length += CI.TrailingInternalLength; 249bf980930SSebastian Neubauer CI.TrailingInternalLength = 0; 25042a55605SJay Foad CI.Last = &MI; 25142a55605SJay Foad CI.BaseOps = std::move(BaseOps); 25242a55605SJay Foad } 253bf980930SSebastian Neubauer } 25442a55605SJay Foad } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) { 25542a55605SJay Foad // Start a new clause. 256bf980930SSebastian Neubauer CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)}; 25742a55605SJay Foad } 25842a55605SJay Foad } 25942a55605SJay Foad 26042a55605SJay Foad // Finish the last clause in the basic block if any. 26142a55605SJay Foad if (CI.Length) 26242a55605SJay Foad Changed |= emitClause(CI, SII); 26342a55605SJay Foad } 26442a55605SJay Foad 26542a55605SJay Foad return Changed; 26642a55605SJay Foad } 26742a55605SJay Foad }; 26842a55605SJay Foad 26942a55605SJay Foad } // namespace 27042a55605SJay Foad 27142a55605SJay Foad char SIInsertHardClauses::ID = 0; 27242a55605SJay Foad 27342a55605SJay Foad char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID; 27442a55605SJay Foad 27542a55605SJay Foad INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses", 27642a55605SJay Foad false, false) 277