1 //===-- R600ClauseMergePass - Merge consecutive CF_ALU -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// R600EmitClauseMarker pass emits CFAlu instruction in a conservative maneer. 12 /// This pass is merging consecutive CFAlus where applicable. 13 /// It needs to be called after IfCvt for best results. 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600Defines.h" 19 #include "R600InstrInfo.h" 20 #include "R600MachineFunctionInfo.h" 21 #include "R600RegisterInfo.h" 22 #include "llvm/CodeGen/MachineFunctionPass.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "r600mergeclause" 31 32 namespace { 33 34 static bool isCFAlu(const MachineInstr *MI) { 35 switch (MI->getOpcode()) { 36 case AMDGPU::CF_ALU: 37 case AMDGPU::CF_ALU_PUSH_BEFORE: 38 return true; 39 default: 40 return false; 41 } 42 } 43 44 class R600ClauseMergePass : public MachineFunctionPass { 45 46 private: 47 static char ID; 48 const R600InstrInfo *TII; 49 50 unsigned getCFAluSize(const MachineInstr *MI) const; 51 bool isCFAluEnabled(const MachineInstr *MI) const; 52 53 /// IfCvt pass can generate "disabled" ALU clause marker that need to be 54 /// removed and their content affected to the previous alu clause. 55 /// This function parse instructions after CFAlu until it find a disabled 56 /// CFAlu and merge the content, or an enabled CFAlu. 57 void cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) const; 58 59 /// Check whether LatrCFAlu can be merged into RootCFAlu and do it if 60 /// it is the case. 61 bool mergeIfPossible(MachineInstr *RootCFAlu, const MachineInstr *LatrCFAlu) 62 const; 63 64 public: 65 R600ClauseMergePass(TargetMachine &tm) : MachineFunctionPass(ID) { } 66 67 bool runOnMachineFunction(MachineFunction &MF) override; 68 69 const char *getPassName() const override; 70 }; 71 72 char R600ClauseMergePass::ID = 0; 73 74 unsigned R600ClauseMergePass::getCFAluSize(const MachineInstr *MI) const { 75 assert(isCFAlu(MI)); 76 return MI->getOperand( 77 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::COUNT)).getImm(); 78 } 79 80 bool R600ClauseMergePass::isCFAluEnabled(const MachineInstr *MI) const { 81 assert(isCFAlu(MI)); 82 return MI->getOperand( 83 TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::Enabled)).getImm(); 84 } 85 86 void R600ClauseMergePass::cleanPotentialDisabledCFAlu(MachineInstr *CFAlu) 87 const { 88 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 89 MachineBasicBlock::iterator I = CFAlu, E = CFAlu->getParent()->end(); 90 I++; 91 do { 92 while (I!= E && !isCFAlu(I)) 93 I++; 94 if (I == E) 95 return; 96 MachineInstr *MI = I++; 97 if (isCFAluEnabled(MI)) 98 break; 99 CFAlu->getOperand(CntIdx).setImm(getCFAluSize(CFAlu) + getCFAluSize(MI)); 100 MI->eraseFromParent(); 101 } while (I != E); 102 } 103 104 bool R600ClauseMergePass::mergeIfPossible(MachineInstr *RootCFAlu, 105 const MachineInstr *LatrCFAlu) const { 106 assert(isCFAlu(RootCFAlu) && isCFAlu(LatrCFAlu)); 107 int CntIdx = TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::COUNT); 108 unsigned RootInstCount = getCFAluSize(RootCFAlu), 109 LaterInstCount = getCFAluSize(LatrCFAlu); 110 unsigned CumuledInsts = RootInstCount + LaterInstCount; 111 if (CumuledInsts >= TII->getMaxAlusPerClause()) { 112 DEBUG(dbgs() << "Excess inst counts\n"); 113 return false; 114 } 115 if (RootCFAlu->getOpcode() == AMDGPU::CF_ALU_PUSH_BEFORE) 116 return false; 117 // Is KCache Bank 0 compatible ? 118 int Mode0Idx = 119 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE0); 120 int KBank0Idx = 121 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK0); 122 int KBank0LineIdx = 123 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR0); 124 if (LatrCFAlu->getOperand(Mode0Idx).getImm() && 125 RootCFAlu->getOperand(Mode0Idx).getImm() && 126 (LatrCFAlu->getOperand(KBank0Idx).getImm() != 127 RootCFAlu->getOperand(KBank0Idx).getImm() || 128 LatrCFAlu->getOperand(KBank0LineIdx).getImm() != 129 RootCFAlu->getOperand(KBank0LineIdx).getImm())) { 130 DEBUG(dbgs() << "Wrong KC0\n"); 131 return false; 132 } 133 // Is KCache Bank 1 compatible ? 134 int Mode1Idx = 135 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_MODE1); 136 int KBank1Idx = 137 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_BANK1); 138 int KBank1LineIdx = 139 TII->getOperandIdx(AMDGPU::CF_ALU, AMDGPU::OpName::KCACHE_ADDR1); 140 if (LatrCFAlu->getOperand(Mode1Idx).getImm() && 141 RootCFAlu->getOperand(Mode1Idx).getImm() && 142 (LatrCFAlu->getOperand(KBank1Idx).getImm() != 143 RootCFAlu->getOperand(KBank1Idx).getImm() || 144 LatrCFAlu->getOperand(KBank1LineIdx).getImm() != 145 RootCFAlu->getOperand(KBank1LineIdx).getImm())) { 146 DEBUG(dbgs() << "Wrong KC0\n"); 147 return false; 148 } 149 if (LatrCFAlu->getOperand(Mode0Idx).getImm()) { 150 RootCFAlu->getOperand(Mode0Idx).setImm( 151 LatrCFAlu->getOperand(Mode0Idx).getImm()); 152 RootCFAlu->getOperand(KBank0Idx).setImm( 153 LatrCFAlu->getOperand(KBank0Idx).getImm()); 154 RootCFAlu->getOperand(KBank0LineIdx).setImm( 155 LatrCFAlu->getOperand(KBank0LineIdx).getImm()); 156 } 157 if (LatrCFAlu->getOperand(Mode1Idx).getImm()) { 158 RootCFAlu->getOperand(Mode1Idx).setImm( 159 LatrCFAlu->getOperand(Mode1Idx).getImm()); 160 RootCFAlu->getOperand(KBank1Idx).setImm( 161 LatrCFAlu->getOperand(KBank1Idx).getImm()); 162 RootCFAlu->getOperand(KBank1LineIdx).setImm( 163 LatrCFAlu->getOperand(KBank1LineIdx).getImm()); 164 } 165 RootCFAlu->getOperand(CntIdx).setImm(CumuledInsts); 166 RootCFAlu->setDesc(TII->get(LatrCFAlu->getOpcode())); 167 return true; 168 } 169 170 bool R600ClauseMergePass::runOnMachineFunction(MachineFunction &MF) { 171 TII = static_cast<const R600InstrInfo *>(MF.getSubtarget().getInstrInfo()); 172 for (MachineFunction::iterator BB = MF.begin(), BB_E = MF.end(); 173 BB != BB_E; ++BB) { 174 MachineBasicBlock &MBB = *BB; 175 MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 176 MachineBasicBlock::iterator LatestCFAlu = E; 177 while (I != E) { 178 MachineInstr *MI = I++; 179 if ((!TII->canBeConsideredALU(MI) && !isCFAlu(MI)) || 180 TII->mustBeLastInClause(MI->getOpcode())) 181 LatestCFAlu = E; 182 if (!isCFAlu(MI)) 183 continue; 184 cleanPotentialDisabledCFAlu(MI); 185 186 if (LatestCFAlu != E && mergeIfPossible(LatestCFAlu, MI)) { 187 MI->eraseFromParent(); 188 } else { 189 assert(MI->getOperand(8).getImm() && "CF ALU instruction disabled"); 190 LatestCFAlu = MI; 191 } 192 } 193 } 194 return false; 195 } 196 197 const char *R600ClauseMergePass::getPassName() const { 198 return "R600 Merge Clause Markers Pass"; 199 } 200 201 } // end anonymous namespace 202 203 204 llvm::FunctionPass *llvm::createR600ClauseMergePass(TargetMachine &TM) { 205 return new R600ClauseMergePass(TM); 206 } 207