1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer 12 /// will sometimes generate these illegal copies in situations like this: 13 /// 14 /// Register Class <vsrc> is the union of <vgpr> and <sgpr> 15 /// 16 /// BB0: 17 /// %vreg0 <sgpr> = SCALAR_INST 18 /// %vreg1 <vsrc> = COPY %vreg0 <sgpr> 19 /// ... 20 /// BRANCH %cond BB1, BB2 21 /// BB1: 22 /// %vreg2 <vgpr> = VECTOR_INST 23 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 24 /// BB2: 25 /// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> 26 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> 27 /// 28 /// 29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting 30 /// code will look like this: 31 /// 32 /// BB0: 33 /// %vreg0 <sgpr> = SCALAR_INST 34 /// ... 35 /// BRANCH %cond BB1, BB2 36 /// BB1: 37 /// %vreg2 <vgpr> = VECTOR_INST 38 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 39 /// BB2: 40 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> 41 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 42 /// 43 /// Now that the result of the PHI instruction is an SGPR, the register 44 /// allocator is now forced to constrain the register class of %vreg3 to 45 /// <sgpr> so we end up with final code like this: 46 /// 47 /// BB0: 48 /// %vreg0 <sgpr> = SCALAR_INST 49 /// ... 50 /// BRANCH %cond BB1, BB2 51 /// BB1: 52 /// %vreg2 <vgpr> = VECTOR_INST 53 /// %vreg3 <sgpr> = COPY %vreg2 <vgpr> 54 /// BB2: 55 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> 56 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 57 /// 58 /// Now this code contains an illegal copy from a VGPR to an SGPR. 59 /// 60 /// In order to avoid this problem, this pass searches for PHI instructions 61 /// which define a <vsrc> register and constrains its definition class to 62 /// <vgpr> if the user of the PHI's definition register is a vector instruction. 63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer 64 /// will be unable to perform the COPY removal from the above example which 65 /// ultimately led to the creation of an illegal COPY. 66 //===----------------------------------------------------------------------===// 67 68 #include "AMDGPU.h" 69 #include "AMDGPUSubtarget.h" 70 #include "SIInstrInfo.h" 71 #include "llvm/CodeGen/MachineFunctionPass.h" 72 #include "llvm/CodeGen/MachineInstrBuilder.h" 73 #include "llvm/CodeGen/MachineRegisterInfo.h" 74 #include "llvm/Support/Debug.h" 75 #include "llvm/Support/raw_ostream.h" 76 #include "llvm/Target/TargetMachine.h" 77 78 using namespace llvm; 79 80 #define DEBUG_TYPE "sgpr-copies" 81 82 namespace { 83 84 class SIFixSGPRCopies : public MachineFunctionPass { 85 86 private: 87 static char ID; 88 const TargetRegisterClass *inferRegClassFromUses(const SIRegisterInfo *TRI, 89 const MachineRegisterInfo &MRI, 90 unsigned Reg, 91 unsigned SubReg) const; 92 const TargetRegisterClass *inferRegClassFromDef(const SIRegisterInfo *TRI, 93 const MachineRegisterInfo &MRI, 94 unsigned Reg, 95 unsigned SubReg) const; 96 bool isVGPRToSGPRCopy(const MachineInstr &Copy, const SIRegisterInfo *TRI, 97 const MachineRegisterInfo &MRI) const; 98 99 public: 100 SIFixSGPRCopies(TargetMachine &tm) : MachineFunctionPass(ID) { } 101 102 bool runOnMachineFunction(MachineFunction &MF) override; 103 104 const char *getPassName() const override { 105 return "SI Fix SGPR copies"; 106 } 107 108 void getAnalysisUsage(AnalysisUsage &AU) const override { 109 AU.setPreservesCFG(); 110 MachineFunctionPass::getAnalysisUsage(AU); 111 } 112 }; 113 114 } // End anonymous namespace 115 116 char SIFixSGPRCopies::ID = 0; 117 118 FunctionPass *llvm::createSIFixSGPRCopiesPass(TargetMachine &tm) { 119 return new SIFixSGPRCopies(tm); 120 } 121 122 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { 123 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 124 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 125 if (!MI.getOperand(i).isReg() || 126 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) 127 continue; 128 129 if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) 130 return true; 131 } 132 return false; 133 } 134 135 /// This functions walks the use list of Reg until it finds an Instruction 136 /// that isn't a COPY returns the register class of that instruction. 137 /// \return The register defined by the first non-COPY instruction. 138 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromUses( 139 const SIRegisterInfo *TRI, 140 const MachineRegisterInfo &MRI, 141 unsigned Reg, 142 unsigned SubReg) const { 143 144 const TargetRegisterClass *RC 145 = TargetRegisterInfo::isVirtualRegister(Reg) ? 146 MRI.getRegClass(Reg) : 147 TRI->getPhysRegClass(Reg); 148 149 RC = TRI->getSubRegClass(RC, SubReg); 150 for (MachineRegisterInfo::use_instr_iterator 151 I = MRI.use_instr_begin(Reg), E = MRI.use_instr_end(); I != E; ++I) { 152 switch (I->getOpcode()) { 153 case AMDGPU::COPY: 154 RC = TRI->getCommonSubClass(RC, inferRegClassFromUses(TRI, MRI, 155 I->getOperand(0).getReg(), 156 I->getOperand(0).getSubReg())); 157 break; 158 } 159 } 160 161 return RC; 162 } 163 164 const TargetRegisterClass *SIFixSGPRCopies::inferRegClassFromDef( 165 const SIRegisterInfo *TRI, 166 const MachineRegisterInfo &MRI, 167 unsigned Reg, 168 unsigned SubReg) const { 169 if (!TargetRegisterInfo::isVirtualRegister(Reg)) { 170 const TargetRegisterClass *RC = TRI->getPhysRegClass(Reg); 171 return TRI->getSubRegClass(RC, SubReg); 172 } 173 MachineInstr *Def = MRI.getVRegDef(Reg); 174 if (Def->getOpcode() != AMDGPU::COPY) { 175 return TRI->getSubRegClass(MRI.getRegClass(Reg), SubReg); 176 } 177 178 return inferRegClassFromDef(TRI, MRI, Def->getOperand(1).getReg(), 179 Def->getOperand(1).getSubReg()); 180 } 181 182 bool SIFixSGPRCopies::isVGPRToSGPRCopy(const MachineInstr &Copy, 183 const SIRegisterInfo *TRI, 184 const MachineRegisterInfo &MRI) const { 185 186 unsigned DstReg = Copy.getOperand(0).getReg(); 187 unsigned SrcReg = Copy.getOperand(1).getReg(); 188 unsigned SrcSubReg = Copy.getOperand(1).getSubReg(); 189 190 if (!TargetRegisterInfo::isVirtualRegister(DstReg)) { 191 // If the destination register is a physical register there isn't really 192 // much we can do to fix this. 193 return false; 194 } 195 196 if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) 197 return false; 198 199 const TargetRegisterClass *DstRC = MRI.getRegClass(DstReg); 200 const TargetRegisterClass *SrcRC 201 = TRI->getSubRegClass(MRI.getRegClass(SrcReg), SrcSubReg); 202 return TRI->isSGPRClass(DstRC) && TRI->hasVGPRs(SrcRC); 203 } 204 205 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { 206 MachineRegisterInfo &MRI = MF.getRegInfo(); 207 const SIRegisterInfo *TRI = 208 static_cast<const SIRegisterInfo *>(MF.getSubtarget().getRegisterInfo()); 209 const SIInstrInfo *TII = 210 static_cast<const SIInstrInfo *>(MF.getSubtarget().getInstrInfo()); 211 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 212 BI != BE; ++BI) { 213 214 MachineBasicBlock &MBB = *BI; 215 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 216 I != E; ++I) { 217 MachineInstr &MI = *I; 218 219 switch (MI.getOpcode()) { 220 default: 221 continue; 222 case AMDGPU::COPY: { 223 if (isVGPRToSGPRCopy(MI, TRI, MRI)) { 224 DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI); 225 TII->moveToVALU(MI); 226 } 227 228 break; 229 } 230 case AMDGPU::PHI: { 231 DEBUG(dbgs() << "Fixing PHI: " << MI); 232 233 for (unsigned i = 1; i < MI.getNumOperands(); i += 2) { 234 const MachineOperand &Op = MI.getOperand(i); 235 unsigned Reg = Op.getReg(); 236 const TargetRegisterClass *RC 237 = inferRegClassFromDef(TRI, MRI, Reg, Op.getSubReg()); 238 239 MRI.constrainRegClass(Op.getReg(), RC); 240 } 241 unsigned Reg = MI.getOperand(0).getReg(); 242 const TargetRegisterClass *RC = inferRegClassFromUses(TRI, MRI, Reg, 243 MI.getOperand(0).getSubReg()); 244 if (TRI->getCommonSubClass(RC, &AMDGPU::VGPR_32RegClass)) { 245 MRI.constrainRegClass(Reg, &AMDGPU::VGPR_32RegClass); 246 } 247 248 if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) 249 break; 250 251 // If a PHI node defines an SGPR and any of its operands are VGPRs, 252 // then we need to move it to the VALU. 253 // 254 // Also, if a PHI node defines an SGPR and has all SGPR operands 255 // we must move it to the VALU, because the SGPR operands will 256 // all end up being assigned the same register, which means 257 // there is a potential for a conflict if different threads take 258 // different control flow paths. 259 // 260 // For Example: 261 // 262 // sgpr0 = def; 263 // ... 264 // sgpr1 = def; 265 // ... 266 // sgpr2 = PHI sgpr0, sgpr1 267 // use sgpr2; 268 // 269 // Will Become: 270 // 271 // sgpr2 = def; 272 // ... 273 // sgpr2 = def; 274 // ... 275 // use sgpr2 276 // 277 // FIXME: This is OK if the branching decision is made based on an 278 // SGPR value. 279 bool SGPRBranch = false; 280 281 // The one exception to this rule is when one of the operands 282 // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK 283 // instruction. In this case, there we know the program will 284 // never enter the second block (the loop) without entering 285 // the first block (where the condition is computed), so there 286 // is no chance for values to be over-written. 287 288 bool HasBreakDef = false; 289 for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { 290 unsigned Reg = MI.getOperand(i).getReg(); 291 if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { 292 TII->moveToVALU(MI); 293 break; 294 } 295 MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); 296 assert(DefInstr); 297 switch(DefInstr->getOpcode()) { 298 299 case AMDGPU::SI_BREAK: 300 case AMDGPU::SI_IF_BREAK: 301 case AMDGPU::SI_ELSE_BREAK: 302 // If we see a PHI instruction that defines an SGPR, then that PHI 303 // instruction has already been considered and should have 304 // a *_BREAK as an operand. 305 case AMDGPU::PHI: 306 HasBreakDef = true; 307 break; 308 } 309 } 310 311 if (!SGPRBranch && !HasBreakDef) 312 TII->moveToVALU(MI); 313 break; 314 } 315 case AMDGPU::REG_SEQUENCE: { 316 if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || 317 !hasVGPROperands(MI, TRI)) 318 continue; 319 320 DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); 321 322 TII->moveToVALU(MI); 323 break; 324 } 325 case AMDGPU::INSERT_SUBREG: { 326 const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; 327 DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); 328 Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); 329 Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); 330 if (TRI->isSGPRClass(DstRC) && 331 (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) { 332 DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI); 333 TII->moveToVALU(MI); 334 } 335 break; 336 } 337 } 338 } 339 } 340 341 return true; 342 } 343