1381ded34SStanislav Mekhanoshin //===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2381ded34SStanislav Mekhanoshin //
3381ded34SStanislav Mekhanoshin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4381ded34SStanislav Mekhanoshin // See https://llvm.org/LICENSE.txt for license information.
5381ded34SStanislav Mekhanoshin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6381ded34SStanislav Mekhanoshin //
7381ded34SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
8381ded34SStanislav Mekhanoshin //
9381ded34SStanislav Mekhanoshin /// \file
10*d1f45ed5SNeubauer, Sebastian /// This pass combines split register tuple initialization into a single pseudo:
11381ded34SStanislav Mekhanoshin ///
12381ded34SStanislav Mekhanoshin ///   undef %0.sub1:sreg_64 = S_MOV_B32 1
13381ded34SStanislav Mekhanoshin ///   %0.sub0:sreg_64 = S_MOV_B32 2
14381ded34SStanislav Mekhanoshin /// =>
15381ded34SStanislav Mekhanoshin ///   %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16381ded34SStanislav Mekhanoshin ///
17381ded34SStanislav Mekhanoshin /// This is to allow rematerialization of a value instead of spilling. It is
18381ded34SStanislav Mekhanoshin /// supposed to be done after register coalescer to allow it to do its job and
19381ded34SStanislav Mekhanoshin /// before actual register allocation to allow rematerialization.
20381ded34SStanislav Mekhanoshin ///
21381ded34SStanislav Mekhanoshin /// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22381ded34SStanislav Mekhanoshin /// although the same shall be possible with other register classes and
23381ded34SStanislav Mekhanoshin /// instructions if necessary.
24381ded34SStanislav Mekhanoshin ///
25381ded34SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
26381ded34SStanislav Mekhanoshin 
27381ded34SStanislav Mekhanoshin #include "AMDGPU.h"
28381ded34SStanislav Mekhanoshin #include "GCNSubtarget.h"
29381ded34SStanislav Mekhanoshin #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
30381ded34SStanislav Mekhanoshin #include "llvm/CodeGen/LiveIntervals.h"
31381ded34SStanislav Mekhanoshin #include "llvm/CodeGen/MachineFunctionPass.h"
32381ded34SStanislav Mekhanoshin #include "llvm/InitializePasses.h"
33381ded34SStanislav Mekhanoshin 
34381ded34SStanislav Mekhanoshin using namespace llvm;
35381ded34SStanislav Mekhanoshin 
36381ded34SStanislav Mekhanoshin #define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
37381ded34SStanislav Mekhanoshin 
38381ded34SStanislav Mekhanoshin namespace {
39381ded34SStanislav Mekhanoshin 
40381ded34SStanislav Mekhanoshin class GCNPreRAOptimizations : public MachineFunctionPass {
41381ded34SStanislav Mekhanoshin private:
42381ded34SStanislav Mekhanoshin   const SIInstrInfo *TII;
431443ba61SVang Thao   const SIRegisterInfo *TRI;
44381ded34SStanislav Mekhanoshin   MachineRegisterInfo *MRI;
45381ded34SStanislav Mekhanoshin   LiveIntervals *LIS;
46381ded34SStanislav Mekhanoshin 
47381ded34SStanislav Mekhanoshin   bool processReg(Register Reg);
48381ded34SStanislav Mekhanoshin 
49381ded34SStanislav Mekhanoshin public:
50381ded34SStanislav Mekhanoshin   static char ID;
51381ded34SStanislav Mekhanoshin 
GCNPreRAOptimizations()52381ded34SStanislav Mekhanoshin   GCNPreRAOptimizations() : MachineFunctionPass(ID) {
53381ded34SStanislav Mekhanoshin     initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
54381ded34SStanislav Mekhanoshin   }
55381ded34SStanislav Mekhanoshin 
56381ded34SStanislav Mekhanoshin   bool runOnMachineFunction(MachineFunction &MF) override;
57381ded34SStanislav Mekhanoshin 
getPassName() const58381ded34SStanislav Mekhanoshin   StringRef getPassName() const override {
59381ded34SStanislav Mekhanoshin     return "AMDGPU Pre-RA optimizations";
60381ded34SStanislav Mekhanoshin   }
61381ded34SStanislav Mekhanoshin 
getAnalysisUsage(AnalysisUsage & AU) const62381ded34SStanislav Mekhanoshin   void getAnalysisUsage(AnalysisUsage &AU) const override {
63381ded34SStanislav Mekhanoshin     AU.addRequired<LiveIntervals>();
64381ded34SStanislav Mekhanoshin     AU.setPreservesAll();
65381ded34SStanislav Mekhanoshin     MachineFunctionPass::getAnalysisUsage(AU);
66381ded34SStanislav Mekhanoshin   }
67381ded34SStanislav Mekhanoshin };
68381ded34SStanislav Mekhanoshin 
69381ded34SStanislav Mekhanoshin } // End anonymous namespace.
70381ded34SStanislav Mekhanoshin 
71381ded34SStanislav Mekhanoshin INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
72381ded34SStanislav Mekhanoshin                       "AMDGPU Pre-RA optimizations", false, false)
73381ded34SStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
74381ded34SStanislav Mekhanoshin INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
75381ded34SStanislav Mekhanoshin                     false, false)
76381ded34SStanislav Mekhanoshin 
77381ded34SStanislav Mekhanoshin char GCNPreRAOptimizations::ID = 0;
78381ded34SStanislav Mekhanoshin 
79381ded34SStanislav Mekhanoshin char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
80381ded34SStanislav Mekhanoshin 
createGCNPreRAOptimizationsPass()81381ded34SStanislav Mekhanoshin FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
82381ded34SStanislav Mekhanoshin   return new GCNPreRAOptimizations();
83381ded34SStanislav Mekhanoshin }
84381ded34SStanislav Mekhanoshin 
processReg(Register Reg)85381ded34SStanislav Mekhanoshin bool GCNPreRAOptimizations::processReg(Register Reg) {
86381ded34SStanislav Mekhanoshin   MachineInstr *Def0 = nullptr;
87381ded34SStanislav Mekhanoshin   MachineInstr *Def1 = nullptr;
88381ded34SStanislav Mekhanoshin   uint64_t Init = 0;
891443ba61SVang Thao   bool Changed = false;
901443ba61SVang Thao   SmallSet<Register, 32> ModifiedRegs;
911443ba61SVang Thao   bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
92381ded34SStanislav Mekhanoshin 
93381ded34SStanislav Mekhanoshin   for (MachineInstr &I : MRI->def_instructions(Reg)) {
941443ba61SVang Thao     switch (I.getOpcode()) {
951443ba61SVang Thao     default:
961443ba61SVang Thao       return false;
971443ba61SVang Thao     case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
981443ba61SVang Thao       break;
991443ba61SVang Thao     case AMDGPU::COPY: {
1001443ba61SVang Thao       // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
1011443ba61SVang Thao       // intermdiate temporary VGPR register. Try to find the defining
1021443ba61SVang Thao       // accvgpr_write to avoid temporary registers.
103fbe61fb0SAbinav Puthan Purayil 
1041443ba61SVang Thao       if (!IsAGPRDst)
105fbe61fb0SAbinav Puthan Purayil         return false;
1061443ba61SVang Thao 
1071443ba61SVang Thao       Register SrcReg = I.getOperand(1).getReg();
1081443ba61SVang Thao 
1091443ba61SVang Thao       if (!SrcReg.isVirtual())
1101443ba61SVang Thao         break;
1111443ba61SVang Thao 
1121443ba61SVang Thao       // Check if source of copy is from another AGPR.
1131443ba61SVang Thao       bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
1141443ba61SVang Thao       if (!IsAGPRSrc)
1151443ba61SVang Thao         break;
1161443ba61SVang Thao 
1171443ba61SVang Thao       // def_instructions() does not look at subregs so it may give us a
1181443ba61SVang Thao       // different instruction that defines the same vreg but different subreg
1191443ba61SVang Thao       // so we have to manually check subreg.
1201443ba61SVang Thao       Register SrcSubReg = I.getOperand(1).getSubReg();
1211443ba61SVang Thao       for (auto &Def : MRI->def_instructions(SrcReg)) {
1221443ba61SVang Thao         if (SrcSubReg != Def.getOperand(0).getSubReg())
1231443ba61SVang Thao           continue;
1241443ba61SVang Thao 
1251443ba61SVang Thao         if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
1261443ba61SVang Thao           MachineOperand DefSrcMO = Def.getOperand(1);
1271443ba61SVang Thao 
1281443ba61SVang Thao           // Immediates are not an issue and can be propagated in
1291443ba61SVang Thao           // postrapseudos pass. Only handle cases where defining
1301443ba61SVang Thao           // accvgpr_write source is a vreg.
1311443ba61SVang Thao           if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
1321443ba61SVang Thao             // Propagate source reg of accvgpr write to this copy instruction
1331443ba61SVang Thao             I.getOperand(1).setReg(DefSrcMO.getReg());
1341443ba61SVang Thao             I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
1351443ba61SVang Thao 
1361443ba61SVang Thao             // Reg uses were changed, collect unique set of registers to update
1371443ba61SVang Thao             // live intervals at the end.
1381443ba61SVang Thao             ModifiedRegs.insert(DefSrcMO.getReg());
1391443ba61SVang Thao             ModifiedRegs.insert(SrcReg);
1401443ba61SVang Thao 
1411443ba61SVang Thao             Changed = true;
1421443ba61SVang Thao           }
1431443ba61SVang Thao 
1441443ba61SVang Thao           // Found the defining accvgpr_write, stop looking any further.
1451443ba61SVang Thao           break;
1461443ba61SVang Thao         }
1471443ba61SVang Thao       }
1481443ba61SVang Thao       break;
1491443ba61SVang Thao     }
1501443ba61SVang Thao     case AMDGPU::S_MOV_B32:
1511443ba61SVang Thao       if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
1521443ba61SVang Thao           I.getNumOperands() != 2)
153381ded34SStanislav Mekhanoshin         return false;
154381ded34SStanislav Mekhanoshin 
155381ded34SStanislav Mekhanoshin       switch (I.getOperand(0).getSubReg()) {
156381ded34SStanislav Mekhanoshin       default:
157381ded34SStanislav Mekhanoshin         return false;
158381ded34SStanislav Mekhanoshin       case AMDGPU::sub0:
159381ded34SStanislav Mekhanoshin         if (Def0)
160381ded34SStanislav Mekhanoshin           return false;
161381ded34SStanislav Mekhanoshin         Def0 = &I;
162381ded34SStanislav Mekhanoshin         Init |= I.getOperand(1).getImm() & 0xffffffff;
163381ded34SStanislav Mekhanoshin         break;
164381ded34SStanislav Mekhanoshin       case AMDGPU::sub1:
165381ded34SStanislav Mekhanoshin         if (Def1)
166381ded34SStanislav Mekhanoshin           return false;
167381ded34SStanislav Mekhanoshin         Def1 = &I;
168381ded34SStanislav Mekhanoshin         Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
169381ded34SStanislav Mekhanoshin         break;
170381ded34SStanislav Mekhanoshin       }
1711443ba61SVang Thao       break;
1721443ba61SVang Thao     }
173381ded34SStanislav Mekhanoshin   }
174381ded34SStanislav Mekhanoshin 
1751443ba61SVang Thao   // For AGPR reg, check if live intervals need to be updated.
1761443ba61SVang Thao   if (IsAGPRDst) {
1771443ba61SVang Thao     if (Changed) {
1781443ba61SVang Thao       for (Register RegToUpdate : ModifiedRegs) {
1791443ba61SVang Thao         LIS->removeInterval(RegToUpdate);
1801443ba61SVang Thao         LIS->createAndComputeVirtRegInterval(RegToUpdate);
1811443ba61SVang Thao       }
1821443ba61SVang Thao     }
1831443ba61SVang Thao 
1841443ba61SVang Thao     return Changed;
1851443ba61SVang Thao   }
1861443ba61SVang Thao 
1871443ba61SVang Thao   // For SGPR reg, check if we can combine instructions.
188381ded34SStanislav Mekhanoshin   if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
1891443ba61SVang Thao     return Changed;
190381ded34SStanislav Mekhanoshin 
191381ded34SStanislav Mekhanoshin   LLVM_DEBUG(dbgs() << "Combining:\n  " << *Def0 << "  " << *Def1
192381ded34SStanislav Mekhanoshin                     << "    =>\n");
193381ded34SStanislav Mekhanoshin 
194381ded34SStanislav Mekhanoshin   if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
195381ded34SStanislav Mekhanoshin                                 LIS->getInstructionIndex(*Def0)))
196381ded34SStanislav Mekhanoshin     std::swap(Def0, Def1);
197381ded34SStanislav Mekhanoshin 
198381ded34SStanislav Mekhanoshin   LIS->RemoveMachineInstrFromMaps(*Def0);
199381ded34SStanislav Mekhanoshin   LIS->RemoveMachineInstrFromMaps(*Def1);
200381ded34SStanislav Mekhanoshin   auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
201381ded34SStanislav Mekhanoshin                       TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
202381ded34SStanislav Mekhanoshin                   .addImm(Init);
203381ded34SStanislav Mekhanoshin 
204381ded34SStanislav Mekhanoshin   Def0->eraseFromParent();
205381ded34SStanislav Mekhanoshin   Def1->eraseFromParent();
206381ded34SStanislav Mekhanoshin   LIS->InsertMachineInstrInMaps(*NewI);
207381ded34SStanislav Mekhanoshin   LIS->removeInterval(Reg);
208381ded34SStanislav Mekhanoshin   LIS->createAndComputeVirtRegInterval(Reg);
209381ded34SStanislav Mekhanoshin 
210381ded34SStanislav Mekhanoshin   LLVM_DEBUG(dbgs() << "  " << *NewI);
211381ded34SStanislav Mekhanoshin 
212381ded34SStanislav Mekhanoshin   return true;
213381ded34SStanislav Mekhanoshin }
214381ded34SStanislav Mekhanoshin 
runOnMachineFunction(MachineFunction & MF)215381ded34SStanislav Mekhanoshin bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
216381ded34SStanislav Mekhanoshin   if (skipFunction(MF.getFunction()))
217381ded34SStanislav Mekhanoshin     return false;
218381ded34SStanislav Mekhanoshin 
219381ded34SStanislav Mekhanoshin   const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
220381ded34SStanislav Mekhanoshin   TII = ST.getInstrInfo();
221381ded34SStanislav Mekhanoshin   MRI = &MF.getRegInfo();
222381ded34SStanislav Mekhanoshin   LIS = &getAnalysis<LiveIntervals>();
2231443ba61SVang Thao   TRI = ST.getRegisterInfo();
224381ded34SStanislav Mekhanoshin 
225381ded34SStanislav Mekhanoshin   bool Changed = false;
226381ded34SStanislav Mekhanoshin 
227381ded34SStanislav Mekhanoshin   for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
228381ded34SStanislav Mekhanoshin     Register Reg = Register::index2VirtReg(I);
229381ded34SStanislav Mekhanoshin     if (!LIS->hasInterval(Reg))
230381ded34SStanislav Mekhanoshin       continue;
231381ded34SStanislav Mekhanoshin     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2321443ba61SVang Thao     if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
2331443ba61SVang Thao         (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
234381ded34SStanislav Mekhanoshin       continue;
2351443ba61SVang Thao 
236381ded34SStanislav Mekhanoshin     Changed |= processReg(Reg);
237381ded34SStanislav Mekhanoshin   }
238381ded34SStanislav Mekhanoshin 
239381ded34SStanislav Mekhanoshin   return Changed;
240381ded34SStanislav Mekhanoshin }
241