142a55605SJay Foad //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
242a55605SJay Foad //
342a55605SJay Foad // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
442a55605SJay Foad // See https://llvm.org/LICENSE.txt for license information.
542a55605SJay Foad // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
642a55605SJay Foad //
742a55605SJay Foad //===----------------------------------------------------------------------===//
842a55605SJay Foad //
942a55605SJay Foad /// \file
1042a55605SJay Foad /// Insert s_clause instructions to form hard clauses.
1142a55605SJay Foad ///
1242a55605SJay Foad /// Clausing load instructions can give cache coherency benefits. Before gfx10,
1342a55605SJay Foad /// the hardware automatically detected "soft clauses", which were sequences of
1442a55605SJay Foad /// memory instructions of the same type. In gfx10 this detection was removed,
1542a55605SJay Foad /// and the s_clause instruction was introduced to explicitly mark "hard
1642a55605SJay Foad /// clauses".
1742a55605SJay Foad ///
1842a55605SJay Foad /// It's the scheduler's job to form the clauses by putting similar memory
1942a55605SJay Foad /// instructions next to each other. Our job is just to insert an s_clause
2042a55605SJay Foad /// instruction to mark the start of each clause.
2142a55605SJay Foad ///
2242a55605SJay Foad /// Note that hard clauses are very similar to, but logically distinct from, the
2342a55605SJay Foad /// groups of instructions that have to be restartable when XNACK is enabled.
2442a55605SJay Foad /// The rules are slightly different in each case. For example an s_nop
2542a55605SJay Foad /// instruction breaks a restartable group, but can appear in the middle of a
2642a55605SJay Foad /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
2742a55605SJay Foad /// "soft clauses" or just "clauses".)
2842a55605SJay Foad ///
2942a55605SJay Foad /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
3042a55605SJay Foad /// groups, not hard clauses.
3142a55605SJay Foad //
3242a55605SJay Foad //===----------------------------------------------------------------------===//
3342a55605SJay Foad 
346a87e9b0Sdfukalov #include "AMDGPU.h"
35560d7e04Sdfukalov #include "GCNSubtarget.h"
36560d7e04Sdfukalov #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
3742a55605SJay Foad #include "llvm/ADT/SmallVector.h"
38989f1c72Sserge-sans-paille #include "llvm/CodeGen/MachineFunctionPass.h"
3942a55605SJay Foad 
4042a55605SJay Foad using namespace llvm;
4142a55605SJay Foad 
4242a55605SJay Foad #define DEBUG_TYPE "si-insert-hard-clauses"
4342a55605SJay Foad 
4442a55605SJay Foad namespace {
4542a55605SJay Foad 
46*ffe86e3bSJay Foad // A clause length of 64 instructions could be encoded in the s_clause
47*ffe86e3bSJay Foad // instruction, but the hardware documentation (at least for GFX11) says that
48*ffe86e3bSJay Foad // 63 is the maximum allowed.
49*ffe86e3bSJay Foad constexpr unsigned MaxInstructionsInClause = 63;
50*ffe86e3bSJay Foad 
5142a55605SJay Foad enum HardClauseType {
52*ffe86e3bSJay Foad   // For GFX10:
53*ffe86e3bSJay Foad 
5442a55605SJay Foad   // Texture, buffer, global or scratch memory instructions.
5542a55605SJay Foad   HARDCLAUSE_VMEM,
5642a55605SJay Foad   // Flat (not global or scratch) memory instructions.
5742a55605SJay Foad   HARDCLAUSE_FLAT,
58*ffe86e3bSJay Foad 
59*ffe86e3bSJay Foad   // For GFX11:
60*ffe86e3bSJay Foad 
61*ffe86e3bSJay Foad   // Texture memory instructions.
62*ffe86e3bSJay Foad   HARDCLAUSE_MIMG_LOAD,
63*ffe86e3bSJay Foad   HARDCLAUSE_MIMG_STORE,
64*ffe86e3bSJay Foad   HARDCLAUSE_MIMG_ATOMIC,
65*ffe86e3bSJay Foad   HARDCLAUSE_MIMG_SAMPLE,
66*ffe86e3bSJay Foad   // Buffer, global or scratch memory instructions.
67*ffe86e3bSJay Foad   HARDCLAUSE_VMEM_LOAD,
68*ffe86e3bSJay Foad   HARDCLAUSE_VMEM_STORE,
69*ffe86e3bSJay Foad   HARDCLAUSE_VMEM_ATOMIC,
70*ffe86e3bSJay Foad   // Flat (not global or scratch) memory instructions.
71*ffe86e3bSJay Foad   HARDCLAUSE_FLAT_LOAD,
72*ffe86e3bSJay Foad   HARDCLAUSE_FLAT_STORE,
73*ffe86e3bSJay Foad   HARDCLAUSE_FLAT_ATOMIC,
74*ffe86e3bSJay Foad   // BVH instructions.
75*ffe86e3bSJay Foad   HARDCLAUSE_BVH,
76*ffe86e3bSJay Foad 
77*ffe86e3bSJay Foad   // Common:
78*ffe86e3bSJay Foad 
7942a55605SJay Foad   // Instructions that access LDS.
8042a55605SJay Foad   HARDCLAUSE_LDS,
8142a55605SJay Foad   // Scalar memory instructions.
8242a55605SJay Foad   HARDCLAUSE_SMEM,
8342a55605SJay Foad   // VALU instructions.
8442a55605SJay Foad   HARDCLAUSE_VALU,
8542a55605SJay Foad   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
8642a55605SJay Foad 
8742a55605SJay Foad   // Internal instructions, which are allowed in the middle of a hard clause,
8842a55605SJay Foad   // except for s_waitcnt.
8942a55605SJay Foad   HARDCLAUSE_INTERNAL,
90bf980930SSebastian Neubauer   // Meta instructions that do not result in any ISA like KILL.
91bf980930SSebastian Neubauer   HARDCLAUSE_IGNORE,
9242a55605SJay Foad   // Instructions that are not allowed in a hard clause: SALU, export, branch,
9342a55605SJay Foad   // message, GDS, s_waitcnt and anything else not mentioned above.
9442a55605SJay Foad   HARDCLAUSE_ILLEGAL,
9542a55605SJay Foad };
9642a55605SJay Foad 
979e026273SJay Foad class SIInsertHardClauses : public MachineFunctionPass {
989e026273SJay Foad public:
999e026273SJay Foad   static char ID;
1009e026273SJay Foad   const GCNSubtarget *ST = nullptr;
1019e026273SJay Foad 
SIInsertHardClauses()1029e026273SJay Foad   SIInsertHardClauses() : MachineFunctionPass(ID) {}
1039e026273SJay Foad 
getAnalysisUsage(AnalysisUsage & AU) const1049e026273SJay Foad   void getAnalysisUsage(AnalysisUsage &AU) const override {
1059e026273SJay Foad     AU.setPreservesCFG();
1069e026273SJay Foad     MachineFunctionPass::getAnalysisUsage(AU);
1079e026273SJay Foad   }
1089e026273SJay Foad 
getHardClauseType(const MachineInstr & MI)10942a55605SJay Foad   HardClauseType getHardClauseType(const MachineInstr &MI) {
110*ffe86e3bSJay Foad     if (MI.mayLoad() || (MI.mayStore() && ST->shouldClusterStores())) {
111*ffe86e3bSJay Foad       if (ST->getGeneration() == AMDGPUSubtarget::GFX10) {
1129cf6ff7aSCarl Ritson         if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
1139cf6ff7aSCarl Ritson           if (ST->hasNSAClauseBug()) {
1149cf6ff7aSCarl Ritson             const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
1159cf6ff7aSCarl Ritson             if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
1169cf6ff7aSCarl Ritson               return HARDCLAUSE_ILLEGAL;
1179cf6ff7aSCarl Ritson           }
11842a55605SJay Foad           return HARDCLAUSE_VMEM;
1199cf6ff7aSCarl Ritson         }
12042a55605SJay Foad         if (SIInstrInfo::isFLAT(MI))
12142a55605SJay Foad           return HARDCLAUSE_FLAT;
122*ffe86e3bSJay Foad       } else {
123*ffe86e3bSJay Foad         assert(ST->getGeneration() >= AMDGPUSubtarget::GFX11);
124*ffe86e3bSJay Foad         if (SIInstrInfo::isMIMG(MI)) {
125*ffe86e3bSJay Foad           const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
126*ffe86e3bSJay Foad           const AMDGPU::MIMGBaseOpcodeInfo *BaseInfo =
127*ffe86e3bSJay Foad               AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
128*ffe86e3bSJay Foad           if (BaseInfo->BVH)
129*ffe86e3bSJay Foad             return HARDCLAUSE_BVH;
130*ffe86e3bSJay Foad           if (BaseInfo->Sampler)
131*ffe86e3bSJay Foad             return HARDCLAUSE_MIMG_SAMPLE;
132*ffe86e3bSJay Foad           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_MIMG_ATOMIC
133*ffe86e3bSJay Foad                                               : HARDCLAUSE_MIMG_LOAD
134*ffe86e3bSJay Foad                               : HARDCLAUSE_MIMG_STORE;
135*ffe86e3bSJay Foad         }
136*ffe86e3bSJay Foad         if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
137*ffe86e3bSJay Foad           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_VMEM_ATOMIC
138*ffe86e3bSJay Foad                                               : HARDCLAUSE_VMEM_LOAD
139*ffe86e3bSJay Foad                               : HARDCLAUSE_VMEM_STORE;
140*ffe86e3bSJay Foad         }
141*ffe86e3bSJay Foad         if (SIInstrInfo::isFLAT(MI)) {
142*ffe86e3bSJay Foad           return MI.mayLoad() ? MI.mayStore() ? HARDCLAUSE_FLAT_ATOMIC
143*ffe86e3bSJay Foad                                               : HARDCLAUSE_FLAT_LOAD
144*ffe86e3bSJay Foad                               : HARDCLAUSE_FLAT_STORE;
145*ffe86e3bSJay Foad         }
146*ffe86e3bSJay Foad       }
14742a55605SJay Foad       // TODO: LDS
14842a55605SJay Foad       if (SIInstrInfo::isSMRD(MI))
14942a55605SJay Foad         return HARDCLAUSE_SMEM;
15042a55605SJay Foad     }
15142a55605SJay Foad 
15242a55605SJay Foad     // Don't form VALU clauses. It's not clear what benefit they give, if any.
15342a55605SJay Foad 
15410c10f24SJay Foad     // In practice s_nop is the only internal instruction we're likely to see.
15542a55605SJay Foad     // It's safe to treat the rest as illegal.
15642a55605SJay Foad     if (MI.getOpcode() == AMDGPU::S_NOP)
15742a55605SJay Foad       return HARDCLAUSE_INTERNAL;
158bf980930SSebastian Neubauer     if (MI.isMetaInstruction())
159bf980930SSebastian Neubauer       return HARDCLAUSE_IGNORE;
16042a55605SJay Foad     return HARDCLAUSE_ILLEGAL;
16142a55605SJay Foad   }
16242a55605SJay Foad 
16342a55605SJay Foad   // Track information about a clause as we discover it.
16442a55605SJay Foad   struct ClauseInfo {
16542a55605SJay Foad     // The type of all (non-internal) instructions in the clause.
16642a55605SJay Foad     HardClauseType Type = HARDCLAUSE_ILLEGAL;
16742a55605SJay Foad     // The first (necessarily non-internal) instruction in the clause.
16842a55605SJay Foad     MachineInstr *First = nullptr;
16942a55605SJay Foad     // The last non-internal instruction in the clause.
17042a55605SJay Foad     MachineInstr *Last = nullptr;
17142a55605SJay Foad     // The length of the clause including any internal instructions in the
172bf980930SSebastian Neubauer     // middle (but not at the end) of the clause.
17342a55605SJay Foad     unsigned Length = 0;
174bf980930SSebastian Neubauer     // Internal instructions at the and of a clause should not be included in
175bf980930SSebastian Neubauer     // the clause. Count them in TrailingInternalLength until a new memory
176bf980930SSebastian Neubauer     // instruction is added.
177bf980930SSebastian Neubauer     unsigned TrailingInternalLength = 0;
17842a55605SJay Foad     // The base operands of *Last.
17942a55605SJay Foad     SmallVector<const MachineOperand *, 4> BaseOps;
18042a55605SJay Foad   };
18142a55605SJay Foad 
emitClause(const ClauseInfo & CI,const SIInstrInfo * SII)18242a55605SJay Foad   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
183bf980930SSebastian Neubauer     if (CI.First == CI.Last)
18442a55605SJay Foad       return false;
185*ffe86e3bSJay Foad     assert(CI.Length <= MaxInstructionsInClause && "Hard clause is too long!");
18642a55605SJay Foad 
18742a55605SJay Foad     auto &MBB = *CI.First->getParent();
18842a55605SJay Foad     auto ClauseMI =
18942a55605SJay Foad         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
190bf980930SSebastian Neubauer             .addImm(CI.Length - 1);
19142a55605SJay Foad     finalizeBundle(MBB, ClauseMI->getIterator(),
19242a55605SJay Foad                    std::next(CI.Last->getIterator()));
19342a55605SJay Foad     return true;
19442a55605SJay Foad   }
19542a55605SJay Foad 
runOnMachineFunction(MachineFunction & MF)19642a55605SJay Foad   bool runOnMachineFunction(MachineFunction &MF) override {
19742a55605SJay Foad     if (skipFunction(MF.getFunction()))
19842a55605SJay Foad       return false;
19942a55605SJay Foad 
2009e026273SJay Foad     ST = &MF.getSubtarget<GCNSubtarget>();
2019e026273SJay Foad     if (!ST->hasHardClauses())
20242a55605SJay Foad       return false;
20342a55605SJay Foad 
2049e026273SJay Foad     const SIInstrInfo *SII = ST->getInstrInfo();
2059e026273SJay Foad     const TargetRegisterInfo *TRI = ST->getRegisterInfo();
20642a55605SJay Foad 
20742a55605SJay Foad     bool Changed = false;
20842a55605SJay Foad     for (auto &MBB : MF) {
20942a55605SJay Foad       ClauseInfo CI;
21042a55605SJay Foad       for (auto &MI : MBB) {
21142a55605SJay Foad         HardClauseType Type = getHardClauseType(MI);
21242a55605SJay Foad 
21342a55605SJay Foad         int64_t Dummy1;
21442a55605SJay Foad         bool Dummy2;
2150ed2c046Shsmahesha         unsigned Dummy3;
21642a55605SJay Foad         SmallVector<const MachineOperand *, 4> BaseOps;
21742a55605SJay Foad         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
2180ed2c046Shsmahesha           if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
2190ed2c046Shsmahesha                                                   Dummy3, TRI)) {
22042a55605SJay Foad             // We failed to get the base operands, so we'll never clause this
22142a55605SJay Foad             // instruction with any other, so pretend it's illegal.
22242a55605SJay Foad             Type = HARDCLAUSE_ILLEGAL;
22342a55605SJay Foad           }
22442a55605SJay Foad         }
22542a55605SJay Foad 
226*ffe86e3bSJay Foad         if (CI.Length == MaxInstructionsInClause ||
22742a55605SJay Foad             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
228bf980930SSebastian Neubauer              Type != HARDCLAUSE_IGNORE &&
22942a55605SJay Foad              (Type != CI.Type ||
23042a55605SJay Foad               // Note that we lie to shouldClusterMemOps about the size of the
23142a55605SJay Foad               // cluster. When shouldClusterMemOps is called from the machine
23242a55605SJay Foad               // scheduler it limits the size of the cluster to avoid increasing
23342a55605SJay Foad               // register pressure too much, but this pass runs after register
23442a55605SJay Foad               // allocation so there is no need for that kind of limit.
2350ed2c046Shsmahesha               !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
23642a55605SJay Foad           // Finish the current clause.
23742a55605SJay Foad           Changed |= emitClause(CI, SII);
23842a55605SJay Foad           CI = ClauseInfo();
23942a55605SJay Foad         }
24042a55605SJay Foad 
24142a55605SJay Foad         if (CI.Length) {
24242a55605SJay Foad           // Extend the current clause.
243bf980930SSebastian Neubauer           if (Type != HARDCLAUSE_IGNORE) {
244bf980930SSebastian Neubauer             if (Type == HARDCLAUSE_INTERNAL) {
245bf980930SSebastian Neubauer               ++CI.TrailingInternalLength;
246bf980930SSebastian Neubauer             } else {
24742a55605SJay Foad               ++CI.Length;
248bf980930SSebastian Neubauer               CI.Length += CI.TrailingInternalLength;
249bf980930SSebastian Neubauer               CI.TrailingInternalLength = 0;
25042a55605SJay Foad               CI.Last = &MI;
25142a55605SJay Foad               CI.BaseOps = std::move(BaseOps);
25242a55605SJay Foad             }
253bf980930SSebastian Neubauer           }
25442a55605SJay Foad         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
25542a55605SJay Foad           // Start a new clause.
256bf980930SSebastian Neubauer           CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
25742a55605SJay Foad         }
25842a55605SJay Foad       }
25942a55605SJay Foad 
26042a55605SJay Foad       // Finish the last clause in the basic block if any.
26142a55605SJay Foad       if (CI.Length)
26242a55605SJay Foad         Changed |= emitClause(CI, SII);
26342a55605SJay Foad     }
26442a55605SJay Foad 
26542a55605SJay Foad     return Changed;
26642a55605SJay Foad   }
26742a55605SJay Foad };
26842a55605SJay Foad 
26942a55605SJay Foad } // namespace
27042a55605SJay Foad 
27142a55605SJay Foad char SIInsertHardClauses::ID = 0;
27242a55605SJay Foad 
27342a55605SJay Foad char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
27442a55605SJay Foad 
27542a55605SJay Foad INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
27642a55605SJay Foad                 false, false)
277