1 //===-- GCNPreRAOptimizations.cpp -----------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass combines split register tuple initialization into a single psuedo: 11 /// 12 /// undef %0.sub1:sreg_64 = S_MOV_B32 1 13 /// %0.sub0:sreg_64 = S_MOV_B32 2 14 /// => 15 /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001 16 /// 17 /// This is to allow rematerialization of a value instead of spilling. It is 18 /// supposed to be done after register coalescer to allow it to do its job and 19 /// before actual register allocation to allow rematerialization. 20 /// 21 /// Right now the pass only handles 64 bit SGPRs with immediate initializers, 22 /// although the same shall be possible with other register classes and 23 /// instructions if necessary. 24 /// 25 //===----------------------------------------------------------------------===// 26 27 #include "AMDGPU.h" 28 #include "GCNSubtarget.h" 29 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 30 #include "llvm/CodeGen/LiveIntervals.h" 31 #include "llvm/CodeGen/MachineFunctionPass.h" 32 #include "llvm/InitializePasses.h" 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "amdgpu-pre-ra-optimizations" 37 38 namespace { 39 40 class GCNPreRAOptimizations : public MachineFunctionPass { 41 private: 42 const SIInstrInfo *TII; 43 const SIRegisterInfo *TRI; 44 MachineRegisterInfo *MRI; 45 LiveIntervals *LIS; 46 47 bool processReg(Register Reg); 48 49 public: 50 static char ID; 51 52 GCNPreRAOptimizations() : MachineFunctionPass(ID) { 53 initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry()); 54 } 55 56 bool runOnMachineFunction(MachineFunction &MF) override; 57 58 StringRef getPassName() const override { 59 return "AMDGPU Pre-RA optimizations"; 60 } 61 62 void getAnalysisUsage(AnalysisUsage &AU) const override { 63 AU.addRequired<LiveIntervals>(); 64 AU.setPreservesAll(); 65 MachineFunctionPass::getAnalysisUsage(AU); 66 } 67 }; 68 69 } // End anonymous namespace. 70 71 INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE, 72 "AMDGPU Pre-RA optimizations", false, false) 73 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 74 INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations", 75 false, false) 76 77 char GCNPreRAOptimizations::ID = 0; 78 79 char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID; 80 81 FunctionPass *llvm::createGCNPreRAOptimizationsPass() { 82 return new GCNPreRAOptimizations(); 83 } 84 85 bool GCNPreRAOptimizations::processReg(Register Reg) { 86 MachineInstr *Def0 = nullptr; 87 MachineInstr *Def1 = nullptr; 88 uint64_t Init = 0; 89 bool Changed = false; 90 SmallSet<Register, 32> ModifiedRegs; 91 bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg)); 92 93 for (MachineInstr &I : MRI->def_instructions(Reg)) { 94 switch (I.getOpcode()) { 95 default: 96 return false; 97 case AMDGPU::V_ACCVGPR_WRITE_B32_e64: 98 break; 99 case AMDGPU::COPY: { 100 // Some subtargets cannot do an AGPR to AGPR copy directly, and need an 101 // intermdiate temporary VGPR register. Try to find the defining 102 // accvgpr_write to avoid temporary registers. 103 if (!IsAGPRDst) 104 break; 105 106 Register SrcReg = I.getOperand(1).getReg(); 107 108 if (!SrcReg.isVirtual()) 109 break; 110 111 // Check if source of copy is from another AGPR. 112 bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg)); 113 if (!IsAGPRSrc) 114 break; 115 116 // def_instructions() does not look at subregs so it may give us a 117 // different instruction that defines the same vreg but different subreg 118 // so we have to manually check subreg. 119 Register SrcSubReg = I.getOperand(1).getSubReg(); 120 for (auto &Def : MRI->def_instructions(SrcReg)) { 121 if (SrcSubReg != Def.getOperand(0).getSubReg()) 122 continue; 123 124 if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) { 125 MachineOperand DefSrcMO = Def.getOperand(1); 126 127 // Immediates are not an issue and can be propagated in 128 // postrapseudos pass. Only handle cases where defining 129 // accvgpr_write source is a vreg. 130 if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) { 131 // Propagate source reg of accvgpr write to this copy instruction 132 I.getOperand(1).setReg(DefSrcMO.getReg()); 133 I.getOperand(1).setSubReg(DefSrcMO.getSubReg()); 134 135 // Reg uses were changed, collect unique set of registers to update 136 // live intervals at the end. 137 ModifiedRegs.insert(DefSrcMO.getReg()); 138 ModifiedRegs.insert(SrcReg); 139 140 Changed = true; 141 } 142 143 // Found the defining accvgpr_write, stop looking any further. 144 break; 145 } 146 } 147 break; 148 } 149 case AMDGPU::S_MOV_B32: 150 if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() || 151 I.getNumOperands() != 2) 152 return false; 153 154 switch (I.getOperand(0).getSubReg()) { 155 default: 156 return false; 157 case AMDGPU::sub0: 158 if (Def0) 159 return false; 160 Def0 = &I; 161 Init |= I.getOperand(1).getImm() & 0xffffffff; 162 break; 163 case AMDGPU::sub1: 164 if (Def1) 165 return false; 166 Def1 = &I; 167 Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32; 168 break; 169 } 170 break; 171 } 172 } 173 174 // For AGPR reg, check if live intervals need to be updated. 175 if (IsAGPRDst) { 176 if (Changed) { 177 for (Register RegToUpdate : ModifiedRegs) { 178 LIS->removeInterval(RegToUpdate); 179 LIS->createAndComputeVirtRegInterval(RegToUpdate); 180 } 181 } 182 183 return Changed; 184 } 185 186 // For SGPR reg, check if we can combine instructions. 187 if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent()) 188 return Changed; 189 190 LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1 191 << " =>\n"); 192 193 if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1), 194 LIS->getInstructionIndex(*Def0))) 195 std::swap(Def0, Def1); 196 197 LIS->RemoveMachineInstrFromMaps(*Def0); 198 LIS->RemoveMachineInstrFromMaps(*Def1); 199 auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(), 200 TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg) 201 .addImm(Init); 202 203 Def0->eraseFromParent(); 204 Def1->eraseFromParent(); 205 LIS->InsertMachineInstrInMaps(*NewI); 206 LIS->removeInterval(Reg); 207 LIS->createAndComputeVirtRegInterval(Reg); 208 209 LLVM_DEBUG(dbgs() << " " << *NewI); 210 211 return true; 212 } 213 214 bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) { 215 if (skipFunction(MF.getFunction())) 216 return false; 217 218 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 219 TII = ST.getInstrInfo(); 220 MRI = &MF.getRegInfo(); 221 LIS = &getAnalysis<LiveIntervals>(); 222 TRI = ST.getRegisterInfo(); 223 224 bool Changed = false; 225 226 for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) { 227 Register Reg = Register::index2VirtReg(I); 228 if (!LIS->hasInterval(Reg)) 229 continue; 230 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 231 if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) && 232 (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC))) 233 continue; 234 235 Changed |= processReg(Reg); 236 } 237 238 return Changed; 239 } 240