1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Insert s_clause instructions to form hard clauses.
11 ///
12 /// Clausing load instructions can give cache coherency benefits. Before gfx10,
13 /// the hardware automatically detected "soft clauses", which were sequences of
14 /// memory instructions of the same type. In gfx10 this detection was removed,
15 /// and the s_clause instruction was introduced to explicitly mark "hard
16 /// clauses".
17 ///
18 /// It's the scheduler's job to form the clauses by putting similar memory
19 /// instructions next to each other. Our job is just to insert an s_clause
20 /// instruction to mark the start of each clause.
21 ///
22 /// Note that hard clauses are very similar to, but logically distinct from, the
23 /// groups of instructions that have to be restartable when XNACK is enabled.
24 /// The rules are slightly different in each case. For example an s_nop
25 /// instruction breaks a restartable group, but can appear in the middle of a
26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27 /// "soft clauses" or just "clauses".)
28 ///
29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30 /// groups, not hard clauses.
31 //
32 //===----------------------------------------------------------------------===//
33 
34 #include "AMDGPUSubtarget.h"
35 #include "SIInstrInfo.h"
36 #include "llvm/ADT/SmallVector.h"
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "si-insert-hard-clauses"
41 
42 namespace {
43 
44 enum HardClauseType {
45   // Texture, buffer, global or scratch memory instructions.
46   HARDCLAUSE_VMEM,
47   // Flat (not global or scratch) memory instructions.
48   HARDCLAUSE_FLAT,
49   // Instructions that access LDS.
50   HARDCLAUSE_LDS,
51   // Scalar memory instructions.
52   HARDCLAUSE_SMEM,
53   // VALU instructions.
54   HARDCLAUSE_VALU,
55   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
56 
57   // Internal instructions, which are allowed in the middle of a hard clause,
58   // except for s_waitcnt.
59   HARDCLAUSE_INTERNAL,
60   // Instructions that are not allowed in a hard clause: SALU, export, branch,
61   // message, GDS, s_waitcnt and anything else not mentioned above.
62   HARDCLAUSE_ILLEGAL,
63 };
64 
65 HardClauseType getHardClauseType(const MachineInstr &MI) {
66   // On current architectures we only get a benefit from clausing loads.
67   if (MI.mayLoad()) {
68     if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI))
69       return HARDCLAUSE_VMEM;
70     if (SIInstrInfo::isFLAT(MI))
71       return HARDCLAUSE_FLAT;
72     // TODO: LDS
73     if (SIInstrInfo::isSMRD(MI))
74       return HARDCLAUSE_SMEM;
75   }
76 
77   // Don't form VALU clauses. It's not clear what benefit they give, if any.
78 
79   // In practice s_nop is the only internal instructions we're likely to see.
80   // It's safe to treat the rest as illegal.
81   if (MI.getOpcode() == AMDGPU::S_NOP)
82     return HARDCLAUSE_INTERNAL;
83   return HARDCLAUSE_ILLEGAL;
84 }
85 
86 class SIInsertHardClauses : public MachineFunctionPass {
87 public:
88   static char ID;
89 
90   SIInsertHardClauses() : MachineFunctionPass(ID) {}
91 
92   void getAnalysisUsage(AnalysisUsage &AU) const override {
93     AU.setPreservesCFG();
94     MachineFunctionPass::getAnalysisUsage(AU);
95   }
96 
97   // Track information about a clause as we discover it.
98   struct ClauseInfo {
99     // The type of all (non-internal) instructions in the clause.
100     HardClauseType Type = HARDCLAUSE_ILLEGAL;
101     // The first (necessarily non-internal) instruction in the clause.
102     MachineInstr *First = nullptr;
103     // The last non-internal instruction in the clause.
104     MachineInstr *Last = nullptr;
105     // The length of the clause including any internal instructions in the
106     // middle.
107     unsigned Length = 0;
108     // The base operands of *Last.
109     SmallVector<const MachineOperand *, 4> BaseOps;
110   };
111 
112   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
113     assert(CI.Length ==
114            std::distance(CI.First->getIterator(), CI.Last->getIterator()) + 1);
115     if (CI.Length < 2)
116       return false;
117     assert(CI.Length <= 64 && "Hard clause is too long!");
118 
119     auto &MBB = *CI.First->getParent();
120     auto ClauseMI =
121         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
122             .addImm(CI.Length - 1);
123     finalizeBundle(MBB, ClauseMI->getIterator(),
124                    std::next(CI.Last->getIterator()));
125     return true;
126   }
127 
128   bool runOnMachineFunction(MachineFunction &MF) override {
129     if (skipFunction(MF.getFunction()))
130       return false;
131 
132     const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
133     if (!ST.hasHardClauses())
134       return false;
135 
136     const SIInstrInfo *SII = ST.getInstrInfo();
137     const TargetRegisterInfo *TRI = ST.getRegisterInfo();
138 
139     bool Changed = false;
140     for (auto &MBB : MF) {
141       ClauseInfo CI;
142       for (auto &MI : MBB) {
143         HardClauseType Type = getHardClauseType(MI);
144 
145         int64_t Dummy1;
146         bool Dummy2;
147         SmallVector<const MachineOperand *, 4> BaseOps;
148         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
149           if (!SII->getMemOperandsWithOffset(MI, BaseOps, Dummy1, Dummy2,
150                                              TRI)) {
151             // We failed to get the base operands, so we'll never clause this
152             // instruction with any other, so pretend it's illegal.
153             Type = HARDCLAUSE_ILLEGAL;
154           }
155         }
156 
157         if (CI.Length == 64 ||
158             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
159              (Type != CI.Type ||
160               // Note that we lie to shouldClusterMemOps about the size of the
161               // cluster. When shouldClusterMemOps is called from the machine
162               // scheduler it limits the size of the cluster to avoid increasing
163               // register pressure too much, but this pass runs after register
164               // allocation so there is no need for that kind of limit.
165               !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2)))) {
166           // Finish the current clause.
167           Changed |= emitClause(CI, SII);
168           CI = ClauseInfo();
169         }
170 
171         if (CI.Length) {
172           // Extend the current clause.
173           ++CI.Length;
174           if (Type != HARDCLAUSE_INTERNAL) {
175             CI.Last = &MI;
176             CI.BaseOps = std::move(BaseOps);
177           }
178         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
179           // Start a new clause.
180           CI = ClauseInfo{Type, &MI, &MI, 1, std::move(BaseOps)};
181         }
182       }
183 
184       // Finish the last clause in the basic block if any.
185       if (CI.Length)
186         Changed |= emitClause(CI, SII);
187     }
188 
189     return Changed;
190   }
191 };
192 
193 } // namespace
194 
195 char SIInsertHardClauses::ID = 0;
196 
197 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
198 
199 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
200                 false, false)
201