1381ded34SStanislav Mekhanoshin //===-- GCNPreRAOptimizations.cpp -----------------------------------------===//
2381ded34SStanislav Mekhanoshin //
3381ded34SStanislav Mekhanoshin // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4381ded34SStanislav Mekhanoshin // See https://llvm.org/LICENSE.txt for license information.
5381ded34SStanislav Mekhanoshin // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6381ded34SStanislav Mekhanoshin //
7381ded34SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
8381ded34SStanislav Mekhanoshin //
9381ded34SStanislav Mekhanoshin /// \file
10*d1f45ed5SNeubauer, Sebastian /// This pass combines split register tuple initialization into a single pseudo:
11381ded34SStanislav Mekhanoshin ///
12381ded34SStanislav Mekhanoshin /// undef %0.sub1:sreg_64 = S_MOV_B32 1
13381ded34SStanislav Mekhanoshin /// %0.sub0:sreg_64 = S_MOV_B32 2
14381ded34SStanislav Mekhanoshin /// =>
15381ded34SStanislav Mekhanoshin /// %0:sreg_64 = S_MOV_B64_IMM_PSEUDO 0x200000001
16381ded34SStanislav Mekhanoshin ///
17381ded34SStanislav Mekhanoshin /// This is to allow rematerialization of a value instead of spilling. It is
18381ded34SStanislav Mekhanoshin /// supposed to be done after register coalescer to allow it to do its job and
19381ded34SStanislav Mekhanoshin /// before actual register allocation to allow rematerialization.
20381ded34SStanislav Mekhanoshin ///
21381ded34SStanislav Mekhanoshin /// Right now the pass only handles 64 bit SGPRs with immediate initializers,
22381ded34SStanislav Mekhanoshin /// although the same shall be possible with other register classes and
23381ded34SStanislav Mekhanoshin /// instructions if necessary.
24381ded34SStanislav Mekhanoshin ///
25381ded34SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
26381ded34SStanislav Mekhanoshin
27381ded34SStanislav Mekhanoshin #include "AMDGPU.h"
28381ded34SStanislav Mekhanoshin #include "GCNSubtarget.h"
29381ded34SStanislav Mekhanoshin #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
30381ded34SStanislav Mekhanoshin #include "llvm/CodeGen/LiveIntervals.h"
31381ded34SStanislav Mekhanoshin #include "llvm/CodeGen/MachineFunctionPass.h"
32381ded34SStanislav Mekhanoshin #include "llvm/InitializePasses.h"
33381ded34SStanislav Mekhanoshin
34381ded34SStanislav Mekhanoshin using namespace llvm;
35381ded34SStanislav Mekhanoshin
36381ded34SStanislav Mekhanoshin #define DEBUG_TYPE "amdgpu-pre-ra-optimizations"
37381ded34SStanislav Mekhanoshin
38381ded34SStanislav Mekhanoshin namespace {
39381ded34SStanislav Mekhanoshin
40381ded34SStanislav Mekhanoshin class GCNPreRAOptimizations : public MachineFunctionPass {
41381ded34SStanislav Mekhanoshin private:
42381ded34SStanislav Mekhanoshin const SIInstrInfo *TII;
431443ba61SVang Thao const SIRegisterInfo *TRI;
44381ded34SStanislav Mekhanoshin MachineRegisterInfo *MRI;
45381ded34SStanislav Mekhanoshin LiveIntervals *LIS;
46381ded34SStanislav Mekhanoshin
47381ded34SStanislav Mekhanoshin bool processReg(Register Reg);
48381ded34SStanislav Mekhanoshin
49381ded34SStanislav Mekhanoshin public:
50381ded34SStanislav Mekhanoshin static char ID;
51381ded34SStanislav Mekhanoshin
GCNPreRAOptimizations()52381ded34SStanislav Mekhanoshin GCNPreRAOptimizations() : MachineFunctionPass(ID) {
53381ded34SStanislav Mekhanoshin initializeGCNPreRAOptimizationsPass(*PassRegistry::getPassRegistry());
54381ded34SStanislav Mekhanoshin }
55381ded34SStanislav Mekhanoshin
56381ded34SStanislav Mekhanoshin bool runOnMachineFunction(MachineFunction &MF) override;
57381ded34SStanislav Mekhanoshin
getPassName() const58381ded34SStanislav Mekhanoshin StringRef getPassName() const override {
59381ded34SStanislav Mekhanoshin return "AMDGPU Pre-RA optimizations";
60381ded34SStanislav Mekhanoshin }
61381ded34SStanislav Mekhanoshin
getAnalysisUsage(AnalysisUsage & AU) const62381ded34SStanislav Mekhanoshin void getAnalysisUsage(AnalysisUsage &AU) const override {
63381ded34SStanislav Mekhanoshin AU.addRequired<LiveIntervals>();
64381ded34SStanislav Mekhanoshin AU.setPreservesAll();
65381ded34SStanislav Mekhanoshin MachineFunctionPass::getAnalysisUsage(AU);
66381ded34SStanislav Mekhanoshin }
67381ded34SStanislav Mekhanoshin };
68381ded34SStanislav Mekhanoshin
69381ded34SStanislav Mekhanoshin } // End anonymous namespace.
70381ded34SStanislav Mekhanoshin
71381ded34SStanislav Mekhanoshin INITIALIZE_PASS_BEGIN(GCNPreRAOptimizations, DEBUG_TYPE,
72381ded34SStanislav Mekhanoshin "AMDGPU Pre-RA optimizations", false, false)
73381ded34SStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
74381ded34SStanislav Mekhanoshin INITIALIZE_PASS_END(GCNPreRAOptimizations, DEBUG_TYPE, "Pre-RA optimizations",
75381ded34SStanislav Mekhanoshin false, false)
76381ded34SStanislav Mekhanoshin
77381ded34SStanislav Mekhanoshin char GCNPreRAOptimizations::ID = 0;
78381ded34SStanislav Mekhanoshin
79381ded34SStanislav Mekhanoshin char &llvm::GCNPreRAOptimizationsID = GCNPreRAOptimizations::ID;
80381ded34SStanislav Mekhanoshin
createGCNPreRAOptimizationsPass()81381ded34SStanislav Mekhanoshin FunctionPass *llvm::createGCNPreRAOptimizationsPass() {
82381ded34SStanislav Mekhanoshin return new GCNPreRAOptimizations();
83381ded34SStanislav Mekhanoshin }
84381ded34SStanislav Mekhanoshin
processReg(Register Reg)85381ded34SStanislav Mekhanoshin bool GCNPreRAOptimizations::processReg(Register Reg) {
86381ded34SStanislav Mekhanoshin MachineInstr *Def0 = nullptr;
87381ded34SStanislav Mekhanoshin MachineInstr *Def1 = nullptr;
88381ded34SStanislav Mekhanoshin uint64_t Init = 0;
891443ba61SVang Thao bool Changed = false;
901443ba61SVang Thao SmallSet<Register, 32> ModifiedRegs;
911443ba61SVang Thao bool IsAGPRDst = TRI->isAGPRClass(MRI->getRegClass(Reg));
92381ded34SStanislav Mekhanoshin
93381ded34SStanislav Mekhanoshin for (MachineInstr &I : MRI->def_instructions(Reg)) {
941443ba61SVang Thao switch (I.getOpcode()) {
951443ba61SVang Thao default:
961443ba61SVang Thao return false;
971443ba61SVang Thao case AMDGPU::V_ACCVGPR_WRITE_B32_e64:
981443ba61SVang Thao break;
991443ba61SVang Thao case AMDGPU::COPY: {
1001443ba61SVang Thao // Some subtargets cannot do an AGPR to AGPR copy directly, and need an
1011443ba61SVang Thao // intermdiate temporary VGPR register. Try to find the defining
1021443ba61SVang Thao // accvgpr_write to avoid temporary registers.
103fbe61fb0SAbinav Puthan Purayil
1041443ba61SVang Thao if (!IsAGPRDst)
105fbe61fb0SAbinav Puthan Purayil return false;
1061443ba61SVang Thao
1071443ba61SVang Thao Register SrcReg = I.getOperand(1).getReg();
1081443ba61SVang Thao
1091443ba61SVang Thao if (!SrcReg.isVirtual())
1101443ba61SVang Thao break;
1111443ba61SVang Thao
1121443ba61SVang Thao // Check if source of copy is from another AGPR.
1131443ba61SVang Thao bool IsAGPRSrc = TRI->isAGPRClass(MRI->getRegClass(SrcReg));
1141443ba61SVang Thao if (!IsAGPRSrc)
1151443ba61SVang Thao break;
1161443ba61SVang Thao
1171443ba61SVang Thao // def_instructions() does not look at subregs so it may give us a
1181443ba61SVang Thao // different instruction that defines the same vreg but different subreg
1191443ba61SVang Thao // so we have to manually check subreg.
1201443ba61SVang Thao Register SrcSubReg = I.getOperand(1).getSubReg();
1211443ba61SVang Thao for (auto &Def : MRI->def_instructions(SrcReg)) {
1221443ba61SVang Thao if (SrcSubReg != Def.getOperand(0).getSubReg())
1231443ba61SVang Thao continue;
1241443ba61SVang Thao
1251443ba61SVang Thao if (Def.getOpcode() == AMDGPU::V_ACCVGPR_WRITE_B32_e64) {
1261443ba61SVang Thao MachineOperand DefSrcMO = Def.getOperand(1);
1271443ba61SVang Thao
1281443ba61SVang Thao // Immediates are not an issue and can be propagated in
1291443ba61SVang Thao // postrapseudos pass. Only handle cases where defining
1301443ba61SVang Thao // accvgpr_write source is a vreg.
1311443ba61SVang Thao if (DefSrcMO.isReg() && DefSrcMO.getReg().isVirtual()) {
1321443ba61SVang Thao // Propagate source reg of accvgpr write to this copy instruction
1331443ba61SVang Thao I.getOperand(1).setReg(DefSrcMO.getReg());
1341443ba61SVang Thao I.getOperand(1).setSubReg(DefSrcMO.getSubReg());
1351443ba61SVang Thao
1361443ba61SVang Thao // Reg uses were changed, collect unique set of registers to update
1371443ba61SVang Thao // live intervals at the end.
1381443ba61SVang Thao ModifiedRegs.insert(DefSrcMO.getReg());
1391443ba61SVang Thao ModifiedRegs.insert(SrcReg);
1401443ba61SVang Thao
1411443ba61SVang Thao Changed = true;
1421443ba61SVang Thao }
1431443ba61SVang Thao
1441443ba61SVang Thao // Found the defining accvgpr_write, stop looking any further.
1451443ba61SVang Thao break;
1461443ba61SVang Thao }
1471443ba61SVang Thao }
1481443ba61SVang Thao break;
1491443ba61SVang Thao }
1501443ba61SVang Thao case AMDGPU::S_MOV_B32:
1511443ba61SVang Thao if (I.getOperand(0).getReg() != Reg || !I.getOperand(1).isImm() ||
1521443ba61SVang Thao I.getNumOperands() != 2)
153381ded34SStanislav Mekhanoshin return false;
154381ded34SStanislav Mekhanoshin
155381ded34SStanislav Mekhanoshin switch (I.getOperand(0).getSubReg()) {
156381ded34SStanislav Mekhanoshin default:
157381ded34SStanislav Mekhanoshin return false;
158381ded34SStanislav Mekhanoshin case AMDGPU::sub0:
159381ded34SStanislav Mekhanoshin if (Def0)
160381ded34SStanislav Mekhanoshin return false;
161381ded34SStanislav Mekhanoshin Def0 = &I;
162381ded34SStanislav Mekhanoshin Init |= I.getOperand(1).getImm() & 0xffffffff;
163381ded34SStanislav Mekhanoshin break;
164381ded34SStanislav Mekhanoshin case AMDGPU::sub1:
165381ded34SStanislav Mekhanoshin if (Def1)
166381ded34SStanislav Mekhanoshin return false;
167381ded34SStanislav Mekhanoshin Def1 = &I;
168381ded34SStanislav Mekhanoshin Init |= static_cast<uint64_t>(I.getOperand(1).getImm()) << 32;
169381ded34SStanislav Mekhanoshin break;
170381ded34SStanislav Mekhanoshin }
1711443ba61SVang Thao break;
1721443ba61SVang Thao }
173381ded34SStanislav Mekhanoshin }
174381ded34SStanislav Mekhanoshin
1751443ba61SVang Thao // For AGPR reg, check if live intervals need to be updated.
1761443ba61SVang Thao if (IsAGPRDst) {
1771443ba61SVang Thao if (Changed) {
1781443ba61SVang Thao for (Register RegToUpdate : ModifiedRegs) {
1791443ba61SVang Thao LIS->removeInterval(RegToUpdate);
1801443ba61SVang Thao LIS->createAndComputeVirtRegInterval(RegToUpdate);
1811443ba61SVang Thao }
1821443ba61SVang Thao }
1831443ba61SVang Thao
1841443ba61SVang Thao return Changed;
1851443ba61SVang Thao }
1861443ba61SVang Thao
1871443ba61SVang Thao // For SGPR reg, check if we can combine instructions.
188381ded34SStanislav Mekhanoshin if (!Def0 || !Def1 || Def0->getParent() != Def1->getParent())
1891443ba61SVang Thao return Changed;
190381ded34SStanislav Mekhanoshin
191381ded34SStanislav Mekhanoshin LLVM_DEBUG(dbgs() << "Combining:\n " << *Def0 << " " << *Def1
192381ded34SStanislav Mekhanoshin << " =>\n");
193381ded34SStanislav Mekhanoshin
194381ded34SStanislav Mekhanoshin if (SlotIndex::isEarlierInstr(LIS->getInstructionIndex(*Def1),
195381ded34SStanislav Mekhanoshin LIS->getInstructionIndex(*Def0)))
196381ded34SStanislav Mekhanoshin std::swap(Def0, Def1);
197381ded34SStanislav Mekhanoshin
198381ded34SStanislav Mekhanoshin LIS->RemoveMachineInstrFromMaps(*Def0);
199381ded34SStanislav Mekhanoshin LIS->RemoveMachineInstrFromMaps(*Def1);
200381ded34SStanislav Mekhanoshin auto NewI = BuildMI(*Def0->getParent(), *Def0, Def0->getDebugLoc(),
201381ded34SStanislav Mekhanoshin TII->get(AMDGPU::S_MOV_B64_IMM_PSEUDO), Reg)
202381ded34SStanislav Mekhanoshin .addImm(Init);
203381ded34SStanislav Mekhanoshin
204381ded34SStanislav Mekhanoshin Def0->eraseFromParent();
205381ded34SStanislav Mekhanoshin Def1->eraseFromParent();
206381ded34SStanislav Mekhanoshin LIS->InsertMachineInstrInMaps(*NewI);
207381ded34SStanislav Mekhanoshin LIS->removeInterval(Reg);
208381ded34SStanislav Mekhanoshin LIS->createAndComputeVirtRegInterval(Reg);
209381ded34SStanislav Mekhanoshin
210381ded34SStanislav Mekhanoshin LLVM_DEBUG(dbgs() << " " << *NewI);
211381ded34SStanislav Mekhanoshin
212381ded34SStanislav Mekhanoshin return true;
213381ded34SStanislav Mekhanoshin }
214381ded34SStanislav Mekhanoshin
runOnMachineFunction(MachineFunction & MF)215381ded34SStanislav Mekhanoshin bool GCNPreRAOptimizations::runOnMachineFunction(MachineFunction &MF) {
216381ded34SStanislav Mekhanoshin if (skipFunction(MF.getFunction()))
217381ded34SStanislav Mekhanoshin return false;
218381ded34SStanislav Mekhanoshin
219381ded34SStanislav Mekhanoshin const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
220381ded34SStanislav Mekhanoshin TII = ST.getInstrInfo();
221381ded34SStanislav Mekhanoshin MRI = &MF.getRegInfo();
222381ded34SStanislav Mekhanoshin LIS = &getAnalysis<LiveIntervals>();
2231443ba61SVang Thao TRI = ST.getRegisterInfo();
224381ded34SStanislav Mekhanoshin
225381ded34SStanislav Mekhanoshin bool Changed = false;
226381ded34SStanislav Mekhanoshin
227381ded34SStanislav Mekhanoshin for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
228381ded34SStanislav Mekhanoshin Register Reg = Register::index2VirtReg(I);
229381ded34SStanislav Mekhanoshin if (!LIS->hasInterval(Reg))
230381ded34SStanislav Mekhanoshin continue;
231381ded34SStanislav Mekhanoshin const TargetRegisterClass *RC = MRI->getRegClass(Reg);
2321443ba61SVang Thao if ((RC->MC->getSizeInBits() != 64 || !TRI->isSGPRClass(RC)) &&
2331443ba61SVang Thao (ST.hasGFX90AInsts() || !TRI->isAGPRClass(RC)))
234381ded34SStanislav Mekhanoshin continue;
2351443ba61SVang Thao
236381ded34SStanislav Mekhanoshin Changed |= processReg(Reg);
237381ded34SStanislav Mekhanoshin }
238381ded34SStanislav Mekhanoshin
239381ded34SStanislav Mekhanoshin return Changed;
240381ded34SStanislav Mekhanoshin }
241