1 //===-- SIFixSGPRCopies.cpp - Remove potential VGPR => SGPR copies --------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Copies from VGPR to SGPR registers are illegal and the register coalescer 12 /// will sometimes generate these illegal copies in situations like this: 13 /// 14 /// Register Class <vsrc> is the union of <vgpr> and <sgpr> 15 /// 16 /// BB0: 17 /// %vreg0 <sgpr> = SCALAR_INST 18 /// %vreg1 <vsrc> = COPY %vreg0 <sgpr> 19 /// ... 20 /// BRANCH %cond BB1, BB2 21 /// BB1: 22 /// %vreg2 <vgpr> = VECTOR_INST 23 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 24 /// BB2: 25 /// %vreg4 <vsrc> = PHI %vreg1 <vsrc>, <BB#0>, %vreg3 <vrsc>, <BB#1> 26 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <vsrc> 27 /// 28 /// 29 /// The coalescer will begin at BB0 and eliminate its copy, then the resulting 30 /// code will look like this: 31 /// 32 /// BB0: 33 /// %vreg0 <sgpr> = SCALAR_INST 34 /// ... 35 /// BRANCH %cond BB1, BB2 36 /// BB1: 37 /// %vreg2 <vgpr> = VECTOR_INST 38 /// %vreg3 <vsrc> = COPY %vreg2 <vgpr> 39 /// BB2: 40 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <vsrc>, <BB#1> 41 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 42 /// 43 /// Now that the result of the PHI instruction is an SGPR, the register 44 /// allocator is now forced to constrain the register class of %vreg3 to 45 /// <sgpr> so we end up with final code like this: 46 /// 47 /// BB0: 48 /// %vreg0 <sgpr> = SCALAR_INST 49 /// ... 50 /// BRANCH %cond BB1, BB2 51 /// BB1: 52 /// %vreg2 <vgpr> = VECTOR_INST 53 /// %vreg3 <sgpr> = COPY %vreg2 <vgpr> 54 /// BB2: 55 /// %vreg4 <sgpr> = PHI %vreg0 <sgpr>, <BB#0>, %vreg3 <sgpr>, <BB#1> 56 /// %vreg5 <vgpr> = VECTOR_INST %vreg4 <sgpr> 57 /// 58 /// Now this code contains an illegal copy from a VGPR to an SGPR. 59 /// 60 /// In order to avoid this problem, this pass searches for PHI instructions 61 /// which define a <vsrc> register and constrains its definition class to 62 /// <vgpr> if the user of the PHI's definition register is a vector instruction. 63 /// If the PHI's definition class is constrained to <vgpr> then the coalescer 64 /// will be unable to perform the COPY removal from the above example which 65 /// ultimately led to the creation of an illegal COPY. 66 //===----------------------------------------------------------------------===// 67 68 #include "AMDGPU.h" 69 #include "AMDGPUSubtarget.h" 70 #include "SIInstrInfo.h" 71 #include "llvm/CodeGen/MachineFunctionPass.h" 72 #include "llvm/CodeGen/MachineInstrBuilder.h" 73 #include "llvm/CodeGen/MachineRegisterInfo.h" 74 #include "llvm/Support/Debug.h" 75 #include "llvm/Support/raw_ostream.h" 76 #include "llvm/Target/TargetMachine.h" 77 78 using namespace llvm; 79 80 #define DEBUG_TYPE "si-fix-sgpr-copies" 81 82 namespace { 83 84 class SIFixSGPRCopies : public MachineFunctionPass { 85 public: 86 static char ID; 87 88 SIFixSGPRCopies() : MachineFunctionPass(ID) { } 89 90 bool runOnMachineFunction(MachineFunction &MF) override; 91 92 StringRef getPassName() const override { return "SI Fix SGPR copies"; } 93 94 void getAnalysisUsage(AnalysisUsage &AU) const override { 95 AU.setPreservesCFG(); 96 MachineFunctionPass::getAnalysisUsage(AU); 97 } 98 }; 99 100 } // End anonymous namespace 101 102 INITIALIZE_PASS(SIFixSGPRCopies, DEBUG_TYPE, 103 "SI Fix SGPR copies", false, false) 104 105 char SIFixSGPRCopies::ID = 0; 106 107 char &llvm::SIFixSGPRCopiesID = SIFixSGPRCopies::ID; 108 109 FunctionPass *llvm::createSIFixSGPRCopiesPass() { 110 return new SIFixSGPRCopies(); 111 } 112 113 static bool hasVGPROperands(const MachineInstr &MI, const SIRegisterInfo *TRI) { 114 const MachineRegisterInfo &MRI = MI.getParent()->getParent()->getRegInfo(); 115 for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) { 116 if (!MI.getOperand(i).isReg() || 117 !TargetRegisterInfo::isVirtualRegister(MI.getOperand(i).getReg())) 118 continue; 119 120 if (TRI->hasVGPRs(MRI.getRegClass(MI.getOperand(i).getReg()))) 121 return true; 122 } 123 return false; 124 } 125 126 static std::pair<const TargetRegisterClass *, const TargetRegisterClass *> 127 getCopyRegClasses(const MachineInstr &Copy, 128 const SIRegisterInfo &TRI, 129 const MachineRegisterInfo &MRI) { 130 unsigned DstReg = Copy.getOperand(0).getReg(); 131 unsigned SrcReg = Copy.getOperand(1).getReg(); 132 133 const TargetRegisterClass *SrcRC = 134 TargetRegisterInfo::isVirtualRegister(SrcReg) ? 135 MRI.getRegClass(SrcReg) : 136 TRI.getPhysRegClass(SrcReg); 137 138 // We don't really care about the subregister here. 139 // SrcRC = TRI.getSubRegClass(SrcRC, Copy.getOperand(1).getSubReg()); 140 141 const TargetRegisterClass *DstRC = 142 TargetRegisterInfo::isVirtualRegister(DstReg) ? 143 MRI.getRegClass(DstReg) : 144 TRI.getPhysRegClass(DstReg); 145 146 return std::make_pair(SrcRC, DstRC); 147 } 148 149 static bool isVGPRToSGPRCopy(const TargetRegisterClass *SrcRC, 150 const TargetRegisterClass *DstRC, 151 const SIRegisterInfo &TRI) { 152 return TRI.isSGPRClass(DstRC) && TRI.hasVGPRs(SrcRC); 153 } 154 155 static bool isSGPRToVGPRCopy(const TargetRegisterClass *SrcRC, 156 const TargetRegisterClass *DstRC, 157 const SIRegisterInfo &TRI) { 158 return TRI.isSGPRClass(SrcRC) && TRI.hasVGPRs(DstRC); 159 } 160 161 // Distribute an SGPR->VGPR copy of a REG_SEQUENCE into a VGPR REG_SEQUENCE. 162 // 163 // SGPRx = ... 164 // SGPRy = REG_SEQUENCE SGPRx, sub0 ... 165 // VGPRz = COPY SGPRy 166 // 167 // ==> 168 // 169 // VGPRx = COPY SGPRx 170 // VGPRz = REG_SEQUENCE VGPRx, sub0 171 // 172 // This exposes immediate folding opportunities when materializing 64-bit 173 // immediates. 174 static bool foldVGPRCopyIntoRegSequence(MachineInstr &MI, 175 const SIRegisterInfo *TRI, 176 const SIInstrInfo *TII, 177 MachineRegisterInfo &MRI) { 178 assert(MI.isRegSequence()); 179 180 unsigned DstReg = MI.getOperand(0).getReg(); 181 if (!TRI->isSGPRClass(MRI.getRegClass(DstReg))) 182 return false; 183 184 if (!MRI.hasOneUse(DstReg)) 185 return false; 186 187 MachineInstr &CopyUse = *MRI.use_instr_begin(DstReg); 188 if (!CopyUse.isCopy()) 189 return false; 190 191 const TargetRegisterClass *SrcRC, *DstRC; 192 std::tie(SrcRC, DstRC) = getCopyRegClasses(CopyUse, *TRI, MRI); 193 194 if (!isSGPRToVGPRCopy(SrcRC, DstRC, *TRI)) 195 return false; 196 197 // TODO: Could have multiple extracts? 198 unsigned SubReg = CopyUse.getOperand(1).getSubReg(); 199 if (SubReg != AMDGPU::NoSubRegister) 200 return false; 201 202 MRI.setRegClass(DstReg, DstRC); 203 204 // SGPRx = ... 205 // SGPRy = REG_SEQUENCE SGPRx, sub0 ... 206 // VGPRz = COPY SGPRy 207 208 // => 209 // VGPRx = COPY SGPRx 210 // VGPRz = REG_SEQUENCE VGPRx, sub0 211 212 MI.getOperand(0).setReg(CopyUse.getOperand(0).getReg()); 213 214 for (unsigned I = 1, N = MI.getNumOperands(); I != N; I += 2) { 215 unsigned SrcReg = MI.getOperand(I).getReg(); 216 unsigned SrcSubReg = MI.getOperand(I).getSubReg(); 217 218 const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg); 219 assert(TRI->isSGPRClass(SrcRC) && 220 "Expected SGPR REG_SEQUENCE to only have SGPR inputs"); 221 222 SrcRC = TRI->getSubRegClass(SrcRC, SrcSubReg); 223 const TargetRegisterClass *NewSrcRC = TRI->getEquivalentVGPRClass(SrcRC); 224 225 unsigned TmpReg = MRI.createVirtualRegister(NewSrcRC); 226 227 BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(), TII->get(AMDGPU::COPY), TmpReg) 228 .addOperand(MI.getOperand(I)); 229 230 MI.getOperand(I).setReg(TmpReg); 231 } 232 233 CopyUse.eraseFromParent(); 234 return true; 235 } 236 237 bool SIFixSGPRCopies::runOnMachineFunction(MachineFunction &MF) { 238 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 239 MachineRegisterInfo &MRI = MF.getRegInfo(); 240 const SIRegisterInfo *TRI = ST.getRegisterInfo(); 241 const SIInstrInfo *TII = ST.getInstrInfo(); 242 243 SmallVector<MachineInstr *, 16> Worklist; 244 245 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 246 BI != BE; ++BI) { 247 248 MachineBasicBlock &MBB = *BI; 249 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 250 I != E; ++I) { 251 MachineInstr &MI = *I; 252 253 switch (MI.getOpcode()) { 254 default: 255 continue; 256 case AMDGPU::COPY: { 257 // If the destination register is a physical register there isn't really 258 // much we can do to fix this. 259 if (!TargetRegisterInfo::isVirtualRegister(MI.getOperand(0).getReg())) 260 continue; 261 262 const TargetRegisterClass *SrcRC, *DstRC; 263 std::tie(SrcRC, DstRC) = getCopyRegClasses(MI, *TRI, MRI); 264 if (isVGPRToSGPRCopy(SrcRC, DstRC, *TRI)) { 265 DEBUG(dbgs() << "Fixing VGPR -> SGPR copy: " << MI); 266 TII->moveToVALU(MI); 267 } 268 269 break; 270 } 271 case AMDGPU::PHI: { 272 DEBUG(dbgs() << "Fixing PHI: " << MI); 273 unsigned Reg = MI.getOperand(0).getReg(); 274 if (!TRI->isSGPRClass(MRI.getRegClass(Reg))) 275 break; 276 277 // If a PHI node defines an SGPR and any of its operands are VGPRs, 278 // then we need to move it to the VALU. 279 // 280 // Also, if a PHI node defines an SGPR and has all SGPR operands 281 // we must move it to the VALU, because the SGPR operands will 282 // all end up being assigned the same register, which means 283 // there is a potential for a conflict if different threads take 284 // different control flow paths. 285 // 286 // For Example: 287 // 288 // sgpr0 = def; 289 // ... 290 // sgpr1 = def; 291 // ... 292 // sgpr2 = PHI sgpr0, sgpr1 293 // use sgpr2; 294 // 295 // Will Become: 296 // 297 // sgpr2 = def; 298 // ... 299 // sgpr2 = def; 300 // ... 301 // use sgpr2 302 // 303 // FIXME: This is OK if the branching decision is made based on an 304 // SGPR value. 305 bool SGPRBranch = false; 306 307 // The one exception to this rule is when one of the operands 308 // is defined by a SI_BREAK, SI_IF_BREAK, or SI_ELSE_BREAK 309 // instruction. In this case, there we know the program will 310 // never enter the second block (the loop) without entering 311 // the first block (where the condition is computed), so there 312 // is no chance for values to be over-written. 313 314 bool HasBreakDef = false; 315 for (unsigned i = 1; i < MI.getNumOperands(); i+=2) { 316 unsigned Reg = MI.getOperand(i).getReg(); 317 if (TRI->hasVGPRs(MRI.getRegClass(Reg))) { 318 TII->moveToVALU(MI); 319 break; 320 } 321 MachineInstr *DefInstr = MRI.getUniqueVRegDef(Reg); 322 assert(DefInstr); 323 switch(DefInstr->getOpcode()) { 324 325 case AMDGPU::SI_BREAK: 326 case AMDGPU::SI_IF_BREAK: 327 case AMDGPU::SI_ELSE_BREAK: 328 // If we see a PHI instruction that defines an SGPR, then that PHI 329 // instruction has already been considered and should have 330 // a *_BREAK as an operand. 331 case AMDGPU::PHI: 332 HasBreakDef = true; 333 break; 334 } 335 } 336 337 if (!SGPRBranch && !HasBreakDef) 338 TII->moveToVALU(MI); 339 break; 340 } 341 case AMDGPU::REG_SEQUENCE: { 342 if (TRI->hasVGPRs(TII->getOpRegClass(MI, 0)) || 343 !hasVGPROperands(MI, TRI)) { 344 foldVGPRCopyIntoRegSequence(MI, TRI, TII, MRI); 345 continue; 346 } 347 348 DEBUG(dbgs() << "Fixing REG_SEQUENCE: " << MI); 349 350 TII->moveToVALU(MI); 351 break; 352 } 353 case AMDGPU::INSERT_SUBREG: { 354 const TargetRegisterClass *DstRC, *Src0RC, *Src1RC; 355 DstRC = MRI.getRegClass(MI.getOperand(0).getReg()); 356 Src0RC = MRI.getRegClass(MI.getOperand(1).getReg()); 357 Src1RC = MRI.getRegClass(MI.getOperand(2).getReg()); 358 if (TRI->isSGPRClass(DstRC) && 359 (TRI->hasVGPRs(Src0RC) || TRI->hasVGPRs(Src1RC))) { 360 DEBUG(dbgs() << " Fixing INSERT_SUBREG: " << MI); 361 TII->moveToVALU(MI); 362 } 363 break; 364 } 365 } 366 } 367 } 368 369 return true; 370 } 371