1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer.
11 /// This pass is merging consecutive CFAlus where applicable.
12 /// It needs to be called after IfCvt for best results.
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 
18 using namespace llvm;
19 
20 #define DEBUG_TYPE "r600mergeclause"
21 
22 namespace {
23 
24 static bool isCFAlu(const MachineInstr &MI) {
25   switch (MI.getOpcode()) {
26   case R600::CF_ALU:
27   case R600::CF_ALU_PUSH_BEFORE:
28     return true;
29   default:
30     return false;
31   }
32 }
33 
34 class R600ClauseMergePass : public MachineFunctionPass {
35 
36 private:
37   const R600InstrInfo *TII;
38 
39   unsigned getCFAluSize(const MachineInstr &MI) const;
40   bool isCFAluEnabled(const MachineInstr &MI) const;
41 
42   /// IfCvt pass can generate "disabled" ALU clause marker that need to be
43   /// removed and their content affected to the previous alu clause.
44   /// This function parse instructions after CFAlu until it find a disabled
45   /// CFAlu and merge the content, or an enabled CFAlu.
46   void cleanPotentialDisabledCFAlu(MachineInstr &CFAlu) const;
47 
48   /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if
49   /// it is the case.
50   bool mergeIfPossible(MachineInstr &RootCFAlu,
51                        const MachineInstr &LatrCFAlu) const;
52 
53 public:
54   static char ID;
55 
56   R600ClauseMergePass() : MachineFunctionPass(ID) { }
57 
58   bool runOnMachineFunction(MachineFunction &MF) override;
59 
60   StringRef getPassName() const override;
61 };
62 
63 } // end anonymous namespace
64 
65 INITIALIZE_PASS_BEGIN(R600ClauseMergePass, DEBUG_TYPE,
66                       "R600 Clause Merge", false, false)
67 INITIALIZE_PASS_END(R600ClauseMergePass, DEBUG_TYPE,
68                     "R600 Clause Merge", false, false)
69 
70 char R600ClauseMergePass::ID = 0;
71 
72 char &llvm::R600ClauseMergePassID = R600ClauseMergePass::ID;
73 
74 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr &MI) const {
75   assert(isCFAlu(MI));
76   return MI
77       .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::COUNT))
78       .getImm();
79 }
80 
81 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr &MI) const {
82   assert(isCFAlu(MI));
83   return MI
84       .getOperand(TII->getOperandIdx(MI.getOpcode(), R600::OpName::Enabled))
85       .getImm();
86 }
87 
88 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(
89     MachineInstr &CFAlu) const {
90   int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
91   MachineBasicBlock::iterator I = CFAlu, E = CFAlu.getParent()->end();
92   I++;
93   do {
94     while (I != E && !isCFAlu(*I))
95       I++;
96     if (I == E)
97       return;
98     MachineInstr &MI = *I++;
99     if (isCFAluEnabled(MI))
100       break;
101     CFAlu.getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI));
102     MI.eraseFromParent();
103   } while (I != E);
104 }
105 
106 bool R600ClauseMergePass::mergeIfPossible(MachineInstr &RootCFAlu,
107                                           const MachineInstr &LatrCFAlu) const {
108   assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu));
109   int CntIdx = TII->getOperandIdx(R600::CF_ALU, R600::OpName::COUNT);
110   unsigned RootInstCount = getCFAluSize(RootCFAlu),
111       LaterInstCount = getCFAluSize(LatrCFAlu);
112   unsigned CumuledInsts = RootInstCount + LaterInstCount;
113   if (CumuledInsts >= TII->getMaxAlusPerClause()) {
114     LLVM_DEBUG(dbgs() << "Excess inst counts\n");
115     return false;
116   }
117   if (RootCFAlu.getOpcode() == R600::CF_ALU_PUSH_BEFORE)
118     return false;
119   // Is KCache Bank 0 compatible ?
120   int Mode0Idx =
121       TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE0);
122   int KBank0Idx =
123       TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK0);
124   int KBank0LineIdx =
125       TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR0);
126   if (LatrCFAlu.getOperand(Mode0Idx).getImm() &&
127       RootCFAlu.getOperand(Mode0Idx).getImm() &&
128       (LatrCFAlu.getOperand(KBank0Idx).getImm() !=
129            RootCFAlu.getOperand(KBank0Idx).getImm() ||
130        LatrCFAlu.getOperand(KBank0LineIdx).getImm() !=
131            RootCFAlu.getOperand(KBank0LineIdx).getImm())) {
132     LLVM_DEBUG(dbgs() << "Wrong KC0\n");
133     return false;
134   }
135   // Is KCache Bank 1 compatible ?
136   int Mode1Idx =
137       TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_MODE1);
138   int KBank1Idx =
139       TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_BANK1);
140   int KBank1LineIdx =
141       TII->getOperandIdx(R600::CF_ALU, R600::OpName::KCACHE_ADDR1);
142   if (LatrCFAlu.getOperand(Mode1Idx).getImm() &&
143       RootCFAlu.getOperand(Mode1Idx).getImm() &&
144       (LatrCFAlu.getOperand(KBank1Idx).getImm() !=
145            RootCFAlu.getOperand(KBank1Idx).getImm() ||
146        LatrCFAlu.getOperand(KBank1LineIdx).getImm() !=
147            RootCFAlu.getOperand(KBank1LineIdx).getImm())) {
148     LLVM_DEBUG(dbgs() << "Wrong KC0\n");
149     return false;
150   }
151   if (LatrCFAlu.getOperand(Mode0Idx).getImm()) {
152     RootCFAlu.getOperand(Mode0Idx).setImm(
153         LatrCFAlu.getOperand(Mode0Idx).getImm());
154     RootCFAlu.getOperand(KBank0Idx).setImm(
155         LatrCFAlu.getOperand(KBank0Idx).getImm());
156     RootCFAlu.getOperand(KBank0LineIdx)
157         .setImm(LatrCFAlu.getOperand(KBank0LineIdx).getImm());
158   }
159   if (LatrCFAlu.getOperand(Mode1Idx).getImm()) {
160     RootCFAlu.getOperand(Mode1Idx).setImm(
161         LatrCFAlu.getOperand(Mode1Idx).getImm());
162     RootCFAlu.getOperand(KBank1Idx).setImm(
163         LatrCFAlu.getOperand(KBank1Idx).getImm());
164     RootCFAlu.getOperand(KBank1LineIdx)
165         .setImm(LatrCFAlu.getOperand(KBank1LineIdx).getImm());
166   }
167   RootCFAlu.getOperand(CntIdx).setImm(CumuledInsts);
168   RootCFAlu.setDesc(TII->get(LatrCFAlu.getOpcode()));
169   return true;
170 }
171 
172 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) {
173   if (skipFunction(MF.getFunction()))
174     return false;
175 
176   const R600Subtarget &ST = MF.getSubtarget<R600Subtarget>();
177   TII = ST.getInstrInfo();
178 
179   for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end();
180                                                   BB != BB_E; ++BB) {
181     MachineBasicBlock &MBB = *BB;
182     MachineBasicBlock::iterator I = MBB.begin(),  E = MBB.end();
183     MachineBasicBlock::iterator LatestCFAlu = E;
184     while (I != E) {
185       MachineInstr &MI = *I++;
186       if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) ||
187           TII->mustBeLastInClause(MI.getOpcode()))
188         LatestCFAlu = E;
189       if (!isCFAlu(MI))
190         continue;
191       cleanPotentialDisabledCFAlu(MI);
192 
193       if (LatestCFAlu != E && mergeIfPossible(*LatestCFAlu, MI)) {
194         MI.eraseFromParent();
195       } else {
196         assert(MI.getOperand(8).getImm() && "CF ALU instruction disabled");
197         LatestCFAlu = MI;
198       }
199     }
200   }
201   return false;
202 }
203 
204 StringRef R600ClauseMergePass::getPassName() const {
205   return "R600 Merge Clause Markers Pass";
206 }
207 
208 llvm::FunctionPass *llvm::createR600ClauseMergePass() {
209   return new R600ClauseMergePass();
210 }
211