1 //===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass combines split register tuple initialization into a single psuedo:
11 ///
12 ///   undef %0.sub1:sreg_64 = S_MOV_B32 1
13 ///   %0.sub0:sreg_64 = S_MOV_B32 2
14 /// =>
15 ///   %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16 ///
17 /// This is to allow rematerialization of a value instead of spilling. It is
18 /// supposed to be done after register coalescer to allow it to do its job and
19 /// before actual register allocation to allow rematerialization.
20 ///
21 /// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22 /// although the same shall be possible with other register classes and
23 /// instructions if necessary.
24 ///
25 //===----------------------------------------------------------------------===//
26 
27 #include "AMDGPU.h"
28 #include "GCNSubtarget.h"
29 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
30 #include "llvm/CodeGen/LiveIntervals.h"
31 #include "llvm/CodeGen/MachineFunctionPass.h"
32 #include "llvm/InitializePasses.h"
33 
34 using namespace llvm;
35 
36 #define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
37 
38 namespace {
39 
40 class GCNPreRAOptimizations : public MachineFunctionPass {
41 private:
42   const SIInstrInfo *TII;
43   const SIRegisterInfo *TRI;
44   MachineRegisterInfo *MRI;
45   LiveIntervals *LIS;
46 
47   bool processReg(Register Reg);
48 
49 public:
50   static char ID;
51 
52   GCNPreRAOptimizations() : MachineFunctionPass(ID) {
53     initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
54   }
55 
56   bool runOnMachineFunction(MachineFunction &MF) override;
57 
58   StringRef getPassName() const override {
59     return "AMDGPU Pre-RA optimizations";
60   }
61 
62   void getAnalysisUsage(AnalysisUsage &AU) const override {
63     AU.addRequired<LiveIntervals>();
64     AU.setPreservesAll();
65     MachineFunctionPass::getAnalysisUsage(AU);
66   }
67 };
68 
69 } // End anonymous namespace.
70 
71 INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
72                       "AMDGPU Pre-RA optimizations", false, false)
73 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
74 INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
75                     false, false)
76 
77 char GCNPreRAOptimizations::ID = 0;
78 
79 char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
80 
81 FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
82   return new GCNPreRAOptimizations();
83 }
84 
85 bool GCNPreRAOptimizations::processReg(Register Reg) {
86   MachineInstr *Def0 = nullptr;
87   MachineInstr *Def1 = nullptr;
88   uint64_t Init = 0;
89   bool Changed = false;
90   SmallSet<Register, 32> ModifiedRegs;
91   bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
92 
93   for (MachineInstr &I : MRI->def_instructions(Reg)) {
94     switch (I.getOpcode()) {
95     default:
96       return false;
97     case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
98       break;
99     case AMDGPU::COPY: {
100       // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
101       // intermdiate temporary VGPR register. Try to find the defining
102       // accvgpr_write to avoid temporary registers.
103       if (!IsAGPRDst)
104         break;
105 
106       Register SrcReg = I.getOperand(1).getReg();
107 
108       if (!SrcReg.isVirtual())
109         break;
110 
111       // Check if source of copy is from another AGPR.
112       bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
113       if (!IsAGPRSrc)
114         break;
115 
116       // def_instructions() does not look at subregs so it may give us a
117       // different instruction that defines the same vreg but different subreg
118       // so we have to manually check subreg.
119       Register SrcSubReg = I.getOperand(1).getSubReg();
120       for (auto &Def : MRI->def_instructions(SrcReg)) {
121         if (SrcSubReg != Def.getOperand(0).getSubReg())
122           continue;
123 
124         if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
125           MachineOperand DefSrcMO = Def.getOperand(1);
126 
127           // Immediates are not an issue and can be propagated in
128           // postrapseudos pass. Only handle cases where defining
129           // accvgpr_write source is a vreg.
130           if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
131             // Propagate source reg of accvgpr write to this copy instruction
132             I.getOperand(1).setReg(DefSrcMO.getReg());
133             I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
134 
135             // Reg uses were changed, collect unique set of registers to update
136             // live intervals at the end.
137             ModifiedRegs.insert(DefSrcMO.getReg());
138             ModifiedRegs.insert(SrcReg);
139 
140             Changed = true;
141           }
142 
143           // Found the defining accvgpr_write, stop looking any further.
144           break;
145         }
146       }
147       break;
148     }
149     case AMDGPU::S_MOV_B32:
150       if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
151           I.getNumOperands() != 2)
152         return false;
153 
154       switch (I.getOperand(0).getSubReg()) {
155       default:
156         return false;
157       case AMDGPU::sub0:
158         if (Def0)
159           return false;
160         Def0 = &I;
161         Init |= I.getOperand(1).getImm() & 0xffffffff;
162         break;
163       case AMDGPU::sub1:
164         if (Def1)
165           return false;
166         Def1 = &I;
167         Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
168         break;
169       }
170       break;
171     }
172   }
173 
174   // For AGPR reg, check if live intervals need to be updated.
175   if (IsAGPRDst) {
176     if (Changed) {
177       for (Register RegToUpdate : ModifiedRegs) {
178         LIS->removeInterval(RegToUpdate);
179         LIS->createAndComputeVirtRegInterval(RegToUpdate);
180       }
181     }
182 
183     return Changed;
184   }
185 
186   // For SGPR reg, check if we can combine instructions.
187   if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
188     return Changed;
189 
190   LLVM_DEBUG(dbgs() << "Combining:\n  " << *Def0 << "  " << *Def1
191                     << "    =>\n");
192 
193   if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
194                                 LIS->getInstructionIndex(*Def0)))
195     std::swap(Def0, Def1);
196 
197   LIS->RemoveMachineInstrFromMaps(*Def0);
198   LIS->RemoveMachineInstrFromMaps(*Def1);
199   auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
200                       TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
201                   .addImm(Init);
202 
203   Def0->eraseFromParent();
204   Def1->eraseFromParent();
205   LIS->InsertMachineInstrInMaps(*NewI);
206   LIS->removeInterval(Reg);
207   LIS->createAndComputeVirtRegInterval(Reg);
208 
209   LLVM_DEBUG(dbgs() << "  " << *NewI);
210 
211   return true;
212 }
213 
214 bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
215   if (skipFunction(MF.getFunction()))
216     return false;
217 
218   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
219   TII = ST.getInstrInfo();
220   MRI = &MF.getRegInfo();
221   LIS = &getAnalysis<LiveIntervals>();
222   TRI = ST.getRegisterInfo();
223 
224   bool Changed = false;
225 
226   for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
227     Register Reg = Register::index2VirtReg(I);
228     if (!LIS->hasInterval(Reg))
229       continue;
230     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
231     if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
232         (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
233       continue;
234 
235     Changed |= processReg(Reg);
236   }
237 
238   return Changed;
239 }
240