1*d1af09adSJoe Nash //===- GCNVOPDUtils.cpp - GCN VOPD Utils  ------------------------===//
2*d1af09adSJoe Nash //
3*d1af09adSJoe Nash // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*d1af09adSJoe Nash // See https://llvm.org/LICENSE.txt for license information.
5*d1af09adSJoe Nash // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*d1af09adSJoe Nash //
7*d1af09adSJoe Nash //===----------------------------------------------------------------------===//
8*d1af09adSJoe Nash //
9*d1af09adSJoe Nash /// \file This file contains the AMDGPU DAG scheduling
10*d1af09adSJoe Nash /// mutation to pair VOPD instructions back to back. It also contains
11*d1af09adSJoe Nash //  subroutines useful in the creation of VOPD instructions
12*d1af09adSJoe Nash //
13*d1af09adSJoe Nash //===----------------------------------------------------------------------===//
14*d1af09adSJoe Nash 
15*d1af09adSJoe Nash #include "GCNVOPDUtils.h"
16*d1af09adSJoe Nash #include "AMDGPUSubtarget.h"
17*d1af09adSJoe Nash #include "GCNSubtarget.h"
18*d1af09adSJoe Nash #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
19*d1af09adSJoe Nash #include "SIInstrInfo.h"
20*d1af09adSJoe Nash #include "llvm/ADT/STLExtras.h"
21*d1af09adSJoe Nash #include "llvm/ADT/SmallVector.h"
22*d1af09adSJoe Nash #include "llvm/CodeGen/MachineBasicBlock.h"
23*d1af09adSJoe Nash #include "llvm/CodeGen/MachineInstr.h"
24*d1af09adSJoe Nash #include "llvm/CodeGen/MachineOperand.h"
25*d1af09adSJoe Nash #include "llvm/CodeGen/MachineRegisterInfo.h"
26*d1af09adSJoe Nash #include "llvm/CodeGen/MacroFusion.h"
27*d1af09adSJoe Nash #include "llvm/CodeGen/ScheduleDAG.h"
28*d1af09adSJoe Nash #include "llvm/CodeGen/ScheduleDAGMutation.h"
29*d1af09adSJoe Nash #include "llvm/CodeGen/TargetInstrInfo.h"
30*d1af09adSJoe Nash #include "llvm/MC/MCInst.h"
31*d1af09adSJoe Nash 
32*d1af09adSJoe Nash using namespace llvm;
33*d1af09adSJoe Nash 
34*d1af09adSJoe Nash #define DEBUG_TYPE "gcn-vopd-utils"
35*d1af09adSJoe Nash 
checkVOPDRegConstraints(const SIInstrInfo & TII,const MachineInstr & FirstMI,const MachineInstr & SecondMI)36*d1af09adSJoe Nash bool llvm::checkVOPDRegConstraints(const SIInstrInfo &TII,
37*d1af09adSJoe Nash                                    const MachineInstr &FirstMI,
38*d1af09adSJoe Nash                                    const MachineInstr &SecondMI) {
39*d1af09adSJoe Nash   const MachineFunction *MF = FirstMI.getMF();
40*d1af09adSJoe Nash   const GCNSubtarget &ST = MF->getSubtarget<GCNSubtarget>();
41*d1af09adSJoe Nash   const SIRegisterInfo *TRI = dyn_cast<SIRegisterInfo>(ST.getRegisterInfo());
42*d1af09adSJoe Nash   const MachineRegisterInfo &MRI = MF->getRegInfo();
43*d1af09adSJoe Nash   const unsigned NumVGPRBanks = 4;
44*d1af09adSJoe Nash   // Literals also count against scalar bus limit
45*d1af09adSJoe Nash   SmallVector<const MachineOperand *> UniqueLiterals;
46*d1af09adSJoe Nash   auto addLiteral = [&](const MachineOperand &Op) {
47*d1af09adSJoe Nash     for (auto &Literal : UniqueLiterals) {
48*d1af09adSJoe Nash       if (Literal->isIdenticalTo(Op))
49*d1af09adSJoe Nash         return;
50*d1af09adSJoe Nash     }
51*d1af09adSJoe Nash     UniqueLiterals.push_back(&Op);
52*d1af09adSJoe Nash   };
53*d1af09adSJoe Nash   SmallVector<Register> UniqueScalarRegs;
54*d1af09adSJoe Nash   assert([&]() -> bool {
55*d1af09adSJoe Nash     for (auto MII = MachineBasicBlock::const_iterator(&FirstMI);
56*d1af09adSJoe Nash          MII != FirstMI.getParent()->instr_end(); ++MII) {
57*d1af09adSJoe Nash       if (&*MII == &SecondMI)
58*d1af09adSJoe Nash         return true;
59*d1af09adSJoe Nash     }
60*d1af09adSJoe Nash     return false;
61*d1af09adSJoe Nash   }() && "Expected FirstMI to precede SecondMI");
62*d1af09adSJoe Nash   // Cannot pair dependent instructions
63*d1af09adSJoe Nash   for (const auto &Use : SecondMI.uses())
64*d1af09adSJoe Nash     if (Use.isReg() && FirstMI.modifiesRegister(Use.getReg()))
65*d1af09adSJoe Nash       return false;
66*d1af09adSJoe Nash 
67*d1af09adSJoe Nash   struct ComponentInfo {
68*d1af09adSJoe Nash     ComponentInfo(const MachineInstr &MI) : MI(MI) {}
69*d1af09adSJoe Nash     Register Dst, Reg0, Reg1, Reg2;
70*d1af09adSJoe Nash     const MachineInstr &MI;
71*d1af09adSJoe Nash   };
72*d1af09adSJoe Nash   ComponentInfo CInfo[] = {ComponentInfo(FirstMI), ComponentInfo(SecondMI)};
73*d1af09adSJoe Nash 
74*d1af09adSJoe Nash   for (ComponentInfo &Comp : CInfo) {
75*d1af09adSJoe Nash     switch (Comp.MI.getOpcode()) {
76*d1af09adSJoe Nash     case AMDGPU::V_FMAMK_F32:
77*d1af09adSJoe Nash       // cannot inline the fixed literal in fmamk
78*d1af09adSJoe Nash       addLiteral(Comp.MI.getOperand(2));
79*d1af09adSJoe Nash       Comp.Reg2 = Comp.MI.getOperand(3).getReg();
80*d1af09adSJoe Nash       break;
81*d1af09adSJoe Nash     case AMDGPU::V_FMAAK_F32:
82*d1af09adSJoe Nash       // cannot inline the fixed literal in fmaak
83*d1af09adSJoe Nash       addLiteral(Comp.MI.getOperand(3));
84*d1af09adSJoe Nash       Comp.Reg1 = Comp.MI.getOperand(2).getReg();
85*d1af09adSJoe Nash       break;
86*d1af09adSJoe Nash     case AMDGPU::V_FMAC_F32_e32:
87*d1af09adSJoe Nash     case AMDGPU::V_DOT2_F32_F16:
88*d1af09adSJoe Nash     case AMDGPU::V_DOT2_F32_BF16:
89*d1af09adSJoe Nash       Comp.Reg1 = Comp.MI.getOperand(2).getReg();
90*d1af09adSJoe Nash       Comp.Reg2 = Comp.MI.getOperand(0).getReg();
91*d1af09adSJoe Nash       break;
92*d1af09adSJoe Nash     case AMDGPU::V_CNDMASK_B32_e32:
93*d1af09adSJoe Nash       UniqueScalarRegs.push_back(AMDGPU::VCC_LO);
94*d1af09adSJoe Nash       Comp.Reg1 = Comp.MI.getOperand(2).getReg();
95*d1af09adSJoe Nash       break;
96*d1af09adSJoe Nash     case AMDGPU::V_MOV_B32_e32:
97*d1af09adSJoe Nash       break;
98*d1af09adSJoe Nash     default:
99*d1af09adSJoe Nash       Comp.Reg1 = Comp.MI.getOperand(2).getReg();
100*d1af09adSJoe Nash       break;
101*d1af09adSJoe Nash     }
102*d1af09adSJoe Nash 
103*d1af09adSJoe Nash     Comp.Dst = Comp.MI.getOperand(0).getReg();
104*d1af09adSJoe Nash 
105*d1af09adSJoe Nash     const MachineOperand &Op0 = Comp.MI.getOperand(1);
106*d1af09adSJoe Nash     if (Op0.isReg()) {
107*d1af09adSJoe Nash       if (!TRI->isVectorRegister(MRI, Op0.getReg())) {
108*d1af09adSJoe Nash         if (!is_contained(UniqueScalarRegs, Op0.getReg()))
109*d1af09adSJoe Nash           UniqueScalarRegs.push_back(Op0.getReg());
110*d1af09adSJoe Nash       } else
111*d1af09adSJoe Nash         Comp.Reg0 = Op0.getReg();
112*d1af09adSJoe Nash     } else {
113*d1af09adSJoe Nash       if (!TII.isInlineConstant(Comp.MI, 1))
114*d1af09adSJoe Nash         addLiteral(Op0);
115*d1af09adSJoe Nash     }
116*d1af09adSJoe Nash   }
117*d1af09adSJoe Nash 
118*d1af09adSJoe Nash   if (UniqueLiterals.size() > 1)
119*d1af09adSJoe Nash     return false;
120*d1af09adSJoe Nash   if ((UniqueLiterals.size() + UniqueScalarRegs.size()) > 2)
121*d1af09adSJoe Nash     return false;
122*d1af09adSJoe Nash 
123*d1af09adSJoe Nash   // check port 0
124*d1af09adSJoe Nash   if (CInfo[0].Reg0 && CInfo[1].Reg0 &&
125*d1af09adSJoe Nash       CInfo[0].Reg0 % NumVGPRBanks == CInfo[1].Reg0 % NumVGPRBanks)
126*d1af09adSJoe Nash     return false;
127*d1af09adSJoe Nash   // check port 1
128*d1af09adSJoe Nash   if (CInfo[0].Reg1 && CInfo[1].Reg1 &&
129*d1af09adSJoe Nash       CInfo[0].Reg1 % NumVGPRBanks == CInfo[1].Reg1 % NumVGPRBanks)
130*d1af09adSJoe Nash     return false;
131*d1af09adSJoe Nash   // check port 2
132*d1af09adSJoe Nash   if (CInfo[0].Reg2 && CInfo[1].Reg2 &&
133*d1af09adSJoe Nash       !((CInfo[0].Reg2 ^ CInfo[1].Reg2) & 0x1))
134*d1af09adSJoe Nash     return false;
135*d1af09adSJoe Nash   if (!((CInfo[0].Dst ^ CInfo[1].Dst) & 0x1))
136*d1af09adSJoe Nash     return false;
137*d1af09adSJoe Nash 
138*d1af09adSJoe Nash   LLVM_DEBUG(dbgs() << "VOPD Reg Constraints Passed\n\tX: " << FirstMI
139*d1af09adSJoe Nash                     << "\n\tY: " << SecondMI << "\n");
140*d1af09adSJoe Nash   return true;
141*d1af09adSJoe Nash }
142*d1af09adSJoe Nash 
143*d1af09adSJoe Nash /// Check if the instr pair, FirstMI and SecondMI, should be scheduled
144*d1af09adSJoe Nash /// together. Given SecondMI, when FirstMI is unspecified, then check if
145*d1af09adSJoe Nash /// SecondMI may be part of a fused pair at all.
shouldScheduleVOPDAdjacent(const TargetInstrInfo & TII,const TargetSubtargetInfo & TSI,const MachineInstr * FirstMI,const MachineInstr & SecondMI)146*d1af09adSJoe Nash static bool shouldScheduleVOPDAdjacent(const TargetInstrInfo &TII,
147*d1af09adSJoe Nash                                        const TargetSubtargetInfo &TSI,
148*d1af09adSJoe Nash                                        const MachineInstr *FirstMI,
149*d1af09adSJoe Nash                                        const MachineInstr &SecondMI) {
150*d1af09adSJoe Nash   const SIInstrInfo &STII = static_cast<const SIInstrInfo &>(TII);
151*d1af09adSJoe Nash   unsigned Opc2 = SecondMI.getOpcode();
152*d1af09adSJoe Nash   auto SecondCanBeVOPD = AMDGPU::getCanBeVOPD(Opc2);
153*d1af09adSJoe Nash 
154*d1af09adSJoe Nash   // One instruction case
155*d1af09adSJoe Nash   if (!FirstMI)
156*d1af09adSJoe Nash     return SecondCanBeVOPD.Y;
157*d1af09adSJoe Nash 
158*d1af09adSJoe Nash   unsigned Opc = FirstMI->getOpcode();
159*d1af09adSJoe Nash   auto FirstCanBeVOPD = AMDGPU::getCanBeVOPD(Opc);
160*d1af09adSJoe Nash 
161*d1af09adSJoe Nash   if (!((FirstCanBeVOPD.X && SecondCanBeVOPD.Y) ||
162*d1af09adSJoe Nash         (FirstCanBeVOPD.Y && SecondCanBeVOPD.X)))
163*d1af09adSJoe Nash     return false;
164*d1af09adSJoe Nash 
165*d1af09adSJoe Nash   return checkVOPDRegConstraints(STII, *FirstMI, SecondMI);
166*d1af09adSJoe Nash }
167*d1af09adSJoe Nash 
168*d1af09adSJoe Nash /// Adapts design from MacroFusion
169*d1af09adSJoe Nash /// Puts valid candidate instructions back-to-back so they can easily
170*d1af09adSJoe Nash /// be turned into VOPD instructions
171*d1af09adSJoe Nash /// Greedily pairs instruction candidates. O(n^2) algorithm.
172*d1af09adSJoe Nash struct VOPDPairingMutation : ScheduleDAGMutation {
173*d1af09adSJoe Nash   ShouldSchedulePredTy shouldScheduleAdjacent; // NOLINT: function pointer
174*d1af09adSJoe Nash 
VOPDPairingMutationVOPDPairingMutation175*d1af09adSJoe Nash   VOPDPairingMutation(
176*d1af09adSJoe Nash       ShouldSchedulePredTy shouldScheduleAdjacent) // NOLINT: function pointer
177*d1af09adSJoe Nash       : shouldScheduleAdjacent(shouldScheduleAdjacent) {}
178*d1af09adSJoe Nash 
applyVOPDPairingMutation179*d1af09adSJoe Nash   void apply(ScheduleDAGInstrs *DAG) override {
180*d1af09adSJoe Nash     const TargetInstrInfo &TII = *DAG->TII;
181*d1af09adSJoe Nash     const GCNSubtarget &ST = DAG->MF.getSubtarget<GCNSubtarget>();
182*d1af09adSJoe Nash     if (!AMDGPU::hasVOPD(ST) || !ST.isWave32()) {
183*d1af09adSJoe Nash       LLVM_DEBUG(dbgs() << "Target does not support VOPDPairingMutation\n");
184*d1af09adSJoe Nash       return;
185*d1af09adSJoe Nash     }
186*d1af09adSJoe Nash 
187*d1af09adSJoe Nash     std::vector<SUnit>::iterator ISUI, JSUI;
188*d1af09adSJoe Nash     for (ISUI = DAG->SUnits.begin(); ISUI != DAG->SUnits.end(); ++ISUI) {
189*d1af09adSJoe Nash       const MachineInstr *IMI = ISUI->getInstr();
190*d1af09adSJoe Nash       if (!shouldScheduleAdjacent(TII, ST, nullptr, *IMI))
191*d1af09adSJoe Nash         continue;
192*d1af09adSJoe Nash       if (!hasLessThanNumFused(*ISUI, 2))
193*d1af09adSJoe Nash         continue;
194*d1af09adSJoe Nash 
195*d1af09adSJoe Nash       for (JSUI = ISUI + 1; JSUI != DAG->SUnits.end(); ++JSUI) {
196*d1af09adSJoe Nash         if (JSUI->isBoundaryNode())
197*d1af09adSJoe Nash           continue;
198*d1af09adSJoe Nash         const MachineInstr *JMI = JSUI->getInstr();
199*d1af09adSJoe Nash         if (!hasLessThanNumFused(*JSUI, 2) ||
200*d1af09adSJoe Nash             !shouldScheduleAdjacent(TII, ST, IMI, *JMI))
201*d1af09adSJoe Nash           continue;
202*d1af09adSJoe Nash         if (fuseInstructionPair(*DAG, *ISUI, *JSUI))
203*d1af09adSJoe Nash           break;
204*d1af09adSJoe Nash       }
205*d1af09adSJoe Nash     }
206*d1af09adSJoe Nash     LLVM_DEBUG(dbgs() << "Completed VOPDPairingMutation\n");
207*d1af09adSJoe Nash   }
208*d1af09adSJoe Nash };
209*d1af09adSJoe Nash 
createVOPDPairingMutation()210*d1af09adSJoe Nash std::unique_ptr<ScheduleDAGMutation> llvm::createVOPDPairingMutation() {
211*d1af09adSJoe Nash   return std::make_unique<VOPDPairingMutation>(shouldScheduleVOPDAdjacent);
212*d1af09adSJoe Nash }
213