1 //===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Insert s_clause instructions to form hard clauses.
11 ///
12 /// Clausing load instructions can give cache coherency benefits. Before gfx10,
13 /// the hardware automatically detected "soft clauses", which were sequences of
14 /// memory instructions of the same type. In gfx10 this detection was removed,
15 /// and the s_clause instruction was introduced to explicitly mark "hard
16 /// clauses".
17 ///
18 /// It's the scheduler's job to form the clauses by putting similar memory
19 /// instructions next to each other. Our job is just to insert an s_clause
20 /// instruction to mark the start of each clause.
21 ///
22 /// Note that hard clauses are very similar to, but logically distinct from, the
23 /// groups of instructions that have to be restartable when XNACK is enabled.
24 /// The rules are slightly different in each case. For example an s_nop
25 /// instruction breaks a restartable group, but can appear in the middle of a
26 /// hard clause. (Before gfx10 there wasn't a distinction, and both were called
27 /// "soft clauses" or just "clauses".)
28 ///
29 /// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
30 /// groups, not hard clauses.
31 //
32 //===----------------------------------------------------------------------===//
33 
34 #include "AMDGPU.h"
35 #include "GCNSubtarget.h"
36 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
37 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/CodeGen/MachineFunctionPass.h"
39 
40 using namespace llvm;
41 
42 #define DEBUG_TYPE "si-insert-hard-clauses"
43 
44 namespace {
45 
46 enum HardClauseType {
47   // Texture, buffer, global or scratch memory instructions.
48   HARDCLAUSE_VMEM,
49   // Flat (not global or scratch) memory instructions.
50   HARDCLAUSE_FLAT,
51   // Instructions that access LDS.
52   HARDCLAUSE_LDS,
53   // Scalar memory instructions.
54   HARDCLAUSE_SMEM,
55   // VALU instructions.
56   HARDCLAUSE_VALU,
57   LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
58 
59   // Internal instructions, which are allowed in the middle of a hard clause,
60   // except for s_waitcnt.
61   HARDCLAUSE_INTERNAL,
62   // Meta instructions that do not result in any ISA like KILL.
63   HARDCLAUSE_IGNORE,
64   // Instructions that are not allowed in a hard clause: SALU, export, branch,
65   // message, GDS, s_waitcnt and anything else not mentioned above.
66   HARDCLAUSE_ILLEGAL,
67 };
68 
69 class SIInsertHardClauses : public MachineFunctionPass {
70 public:
71   static char ID;
72   const GCNSubtarget *ST = nullptr;
73 
74   SIInsertHardClauses() : MachineFunctionPass(ID) {}
75 
76   void getAnalysisUsage(AnalysisUsage &AU) const override {
77     AU.setPreservesCFG();
78     MachineFunctionPass::getAnalysisUsage(AU);
79   }
80 
81   HardClauseType getHardClauseType(const MachineInstr &MI) {
82 
83     // On current architectures we only get a benefit from clausing loads.
84     if (MI.mayLoad()) {
85       if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
86         if (ST->hasNSAClauseBug()) {
87           const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
88           if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
89             return HARDCLAUSE_ILLEGAL;
90         }
91         return HARDCLAUSE_VMEM;
92       }
93       if (SIInstrInfo::isFLAT(MI))
94         return HARDCLAUSE_FLAT;
95       // TODO: LDS
96       if (SIInstrInfo::isSMRD(MI))
97         return HARDCLAUSE_SMEM;
98     }
99 
100     // Don't form VALU clauses. It's not clear what benefit they give, if any.
101 
102     // In practice s_nop is the only internal instruction we're likely to see.
103     // It's safe to treat the rest as illegal.
104     if (MI.getOpcode() == AMDGPU::S_NOP)
105       return HARDCLAUSE_INTERNAL;
106     if (MI.isMetaInstruction())
107       return HARDCLAUSE_IGNORE;
108     return HARDCLAUSE_ILLEGAL;
109   }
110 
111   // Track information about a clause as we discover it.
112   struct ClauseInfo {
113     // The type of all (non-internal) instructions in the clause.
114     HardClauseType Type = HARDCLAUSE_ILLEGAL;
115     // The first (necessarily non-internal) instruction in the clause.
116     MachineInstr *First = nullptr;
117     // The last non-internal instruction in the clause.
118     MachineInstr *Last = nullptr;
119     // The length of the clause including any internal instructions in the
120     // middle (but not at the end) of the clause.
121     unsigned Length = 0;
122     // Internal instructions at the and of a clause should not be included in
123     // the clause. Count them in TrailingInternalLength until a new memory
124     // instruction is added.
125     unsigned TrailingInternalLength = 0;
126     // The base operands of *Last.
127     SmallVector<const MachineOperand *, 4> BaseOps;
128   };
129 
130   bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
131     if (CI.First == CI.Last)
132       return false;
133     assert(CI.Length <= 64 && "Hard clause is too long!");
134 
135     auto &MBB = *CI.First->getParent();
136     auto ClauseMI =
137         BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
138             .addImm(CI.Length - 1);
139     finalizeBundle(MBB, ClauseMI->getIterator(),
140                    std::next(CI.Last->getIterator()));
141     return true;
142   }
143 
144   bool runOnMachineFunction(MachineFunction &MF) override {
145     if (skipFunction(MF.getFunction()))
146       return false;
147 
148     ST = &MF.getSubtarget<GCNSubtarget>();
149     if (!ST->hasHardClauses())
150       return false;
151 
152     const SIInstrInfo *SII = ST->getInstrInfo();
153     const TargetRegisterInfo *TRI = ST->getRegisterInfo();
154 
155     bool Changed = false;
156     for (auto &MBB : MF) {
157       ClauseInfo CI;
158       for (auto &MI : MBB) {
159         HardClauseType Type = getHardClauseType(MI);
160 
161         int64_t Dummy1;
162         bool Dummy2;
163         unsigned Dummy3;
164         SmallVector<const MachineOperand *, 4> BaseOps;
165         if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
166           if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
167                                                   Dummy3, TRI)) {
168             // We failed to get the base operands, so we'll never clause this
169             // instruction with any other, so pretend it's illegal.
170             Type = HARDCLAUSE_ILLEGAL;
171           }
172         }
173 
174         if (CI.Length == 64 ||
175             (CI.Length && Type != HARDCLAUSE_INTERNAL &&
176              Type != HARDCLAUSE_IGNORE &&
177              (Type != CI.Type ||
178               // Note that we lie to shouldClusterMemOps about the size of the
179               // cluster. When shouldClusterMemOps is called from the machine
180               // scheduler it limits the size of the cluster to avoid increasing
181               // register pressure too much, but this pass runs after register
182               // allocation so there is no need for that kind of limit.
183               !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
184           // Finish the current clause.
185           Changed |= emitClause(CI, SII);
186           CI = ClauseInfo();
187         }
188 
189         if (CI.Length) {
190           // Extend the current clause.
191           if (Type != HARDCLAUSE_IGNORE) {
192             if (Type == HARDCLAUSE_INTERNAL) {
193               ++CI.TrailingInternalLength;
194             } else {
195               ++CI.Length;
196               CI.Length += CI.TrailingInternalLength;
197               CI.TrailingInternalLength = 0;
198               CI.Last = &MI;
199               CI.BaseOps = std::move(BaseOps);
200             }
201           }
202         } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
203           // Start a new clause.
204           CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
205         }
206       }
207 
208       // Finish the last clause in the basic block if any.
209       if (CI.Length)
210         Changed |= emitClause(CI, SII);
211     }
212 
213     return Changed;
214   }
215 };
216 
217 } // namespace
218 
219 char SIInsertHardClauses::ID = 0;
220 
221 char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
222 
223 INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
224                 false, false)
225