1739174c4SStanislav Mekhanoshin //===-- SIFormMemoryClauses.cpp -------------------------------------------===//
2739174c4SStanislav Mekhanoshin //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6739174c4SStanislav Mekhanoshin //
7739174c4SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
8739174c4SStanislav Mekhanoshin //
9172d746eSAustin Kerbow /// \file This pass extends the live ranges of registers used as pointers in
10172d746eSAustin Kerbow /// sequences of adjacent SMEM and VMEM instructions if XNACK is enabled. A
11172d746eSAustin Kerbow /// load that would overwrite a pointer would require breaking the soft clause.
12172d746eSAustin Kerbow /// Artificially extend the live ranges of the pointer operands by adding
13172d746eSAustin Kerbow /// implicit-def early-clobber operands throughout the soft clause.
14739174c4SStanislav Mekhanoshin ///
15739174c4SStanislav Mekhanoshin //===----------------------------------------------------------------------===//
16739174c4SStanislav Mekhanoshin
17739174c4SStanislav Mekhanoshin #include "AMDGPU.h"
18739174c4SStanislav Mekhanoshin #include "GCNRegPressure.h"
19739174c4SStanislav Mekhanoshin #include "SIMachineFunctionInfo.h"
2005da2fe5SReid Kleckner #include "llvm/InitializePasses.h"
21739174c4SStanislav Mekhanoshin
22739174c4SStanislav Mekhanoshin using namespace llvm;
23739174c4SStanislav Mekhanoshin
24739174c4SStanislav Mekhanoshin #define DEBUG_TYPE "si-form-memory-clauses"
25739174c4SStanislav Mekhanoshin
26739174c4SStanislav Mekhanoshin // Clauses longer then 15 instructions would overflow one of the counters
27739174c4SStanislav Mekhanoshin // and stall. They can stall even earlier if there are outstanding counters.
28739174c4SStanislav Mekhanoshin static cl::opt<unsigned>
29739174c4SStanislav Mekhanoshin MaxClause("amdgpu-max-memory-clause", cl::Hidden, cl::init(15),
30739174c4SStanislav Mekhanoshin cl::desc("Maximum length of a memory clause, instructions"));
31739174c4SStanislav Mekhanoshin
32739174c4SStanislav Mekhanoshin namespace {
33739174c4SStanislav Mekhanoshin
34739174c4SStanislav Mekhanoshin class SIFormMemoryClauses : public MachineFunctionPass {
35739174c4SStanislav Mekhanoshin typedef DenseMap<unsigned, std::pair<unsigned, LaneBitmask>> RegUse;
36739174c4SStanislav Mekhanoshin
37739174c4SStanislav Mekhanoshin public:
38739174c4SStanislav Mekhanoshin static char ID;
39739174c4SStanislav Mekhanoshin
40739174c4SStanislav Mekhanoshin public:
SIFormMemoryClauses()41739174c4SStanislav Mekhanoshin SIFormMemoryClauses() : MachineFunctionPass(ID) {
42739174c4SStanislav Mekhanoshin initializeSIFormMemoryClausesPass(*PassRegistry::getPassRegistry());
43739174c4SStanislav Mekhanoshin }
44739174c4SStanislav Mekhanoshin
45739174c4SStanislav Mekhanoshin bool runOnMachineFunction(MachineFunction &MF) override;
46739174c4SStanislav Mekhanoshin
getPassName() const47739174c4SStanislav Mekhanoshin StringRef getPassName() const override {
48739174c4SStanislav Mekhanoshin return "SI Form memory clauses";
49739174c4SStanislav Mekhanoshin }
50739174c4SStanislav Mekhanoshin
getAnalysisUsage(AnalysisUsage & AU) const51739174c4SStanislav Mekhanoshin void getAnalysisUsage(AnalysisUsage &AU) const override {
52739174c4SStanislav Mekhanoshin AU.addRequired<LiveIntervals>();
53739174c4SStanislav Mekhanoshin AU.setPreservesAll();
54739174c4SStanislav Mekhanoshin MachineFunctionPass::getAnalysisUsage(AU);
55739174c4SStanislav Mekhanoshin }
56739174c4SStanislav Mekhanoshin
getClearedProperties() const57551a69e4SMatt Arsenault MachineFunctionProperties getClearedProperties() const override {
58551a69e4SMatt Arsenault return MachineFunctionProperties().set(
59551a69e4SMatt Arsenault MachineFunctionProperties::Property::IsSSA);
60551a69e4SMatt Arsenault }
61551a69e4SMatt Arsenault
62739174c4SStanislav Mekhanoshin private:
638f14a088SMatt Arsenault bool canBundle(const MachineInstr &MI, const RegUse &Defs,
648f14a088SMatt Arsenault const RegUse &Uses) const;
65477e3fe4SMatt Arsenault bool checkPressure(const MachineInstr &MI, GCNDownwardRPTracker &RPT);
66739174c4SStanislav Mekhanoshin void collectRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses) const;
67739174c4SStanislav Mekhanoshin bool processRegUses(const MachineInstr &MI, RegUse &Defs, RegUse &Uses,
68477e3fe4SMatt Arsenault GCNDownwardRPTracker &RPT);
69739174c4SStanislav Mekhanoshin
705bfbae5cSTom Stellard const GCNSubtarget *ST;
71739174c4SStanislav Mekhanoshin const SIRegisterInfo *TRI;
72739174c4SStanislav Mekhanoshin const MachineRegisterInfo *MRI;
73739174c4SStanislav Mekhanoshin SIMachineFunctionInfo *MFI;
74739174c4SStanislav Mekhanoshin
75739174c4SStanislav Mekhanoshin unsigned LastRecordedOccupancy;
76739174c4SStanislav Mekhanoshin unsigned MaxVGPRs;
77739174c4SStanislav Mekhanoshin unsigned MaxSGPRs;
78739174c4SStanislav Mekhanoshin };
79739174c4SStanislav Mekhanoshin
80739174c4SStanislav Mekhanoshin } // End anonymous namespace.
81739174c4SStanislav Mekhanoshin
82739174c4SStanislav Mekhanoshin INITIALIZE_PASS_BEGIN(SIFormMemoryClauses, DEBUG_TYPE,
83739174c4SStanislav Mekhanoshin "SI Form memory clauses", false, false)
84739174c4SStanislav Mekhanoshin INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
85739174c4SStanislav Mekhanoshin INITIALIZE_PASS_END(SIFormMemoryClauses, DEBUG_TYPE,
86739174c4SStanislav Mekhanoshin "SI Form memory clauses", false, false)
87739174c4SStanislav Mekhanoshin
88739174c4SStanislav Mekhanoshin
89739174c4SStanislav Mekhanoshin char SIFormMemoryClauses::ID = 0;
90739174c4SStanislav Mekhanoshin
91739174c4SStanislav Mekhanoshin char &llvm::SIFormMemoryClausesID = SIFormMemoryClauses::ID;
92739174c4SStanislav Mekhanoshin
createSIFormMemoryClausesPass()93739174c4SStanislav Mekhanoshin FunctionPass *llvm::createSIFormMemoryClausesPass() {
94739174c4SStanislav Mekhanoshin return new SIFormMemoryClauses();
95739174c4SStanislav Mekhanoshin }
96739174c4SStanislav Mekhanoshin
isVMEMClauseInst(const MachineInstr & MI)97739174c4SStanislav Mekhanoshin static bool isVMEMClauseInst(const MachineInstr &MI) {
98739174c4SStanislav Mekhanoshin return SIInstrInfo::isFLAT(MI) || SIInstrInfo::isVMEM(MI);
99739174c4SStanislav Mekhanoshin }
100739174c4SStanislav Mekhanoshin
isSMEMClauseInst(const MachineInstr & MI)101739174c4SStanislav Mekhanoshin static bool isSMEMClauseInst(const MachineInstr &MI) {
102739174c4SStanislav Mekhanoshin return SIInstrInfo::isSMRD(MI);
103739174c4SStanislav Mekhanoshin }
104739174c4SStanislav Mekhanoshin
105739174c4SStanislav Mekhanoshin // There no sense to create store clauses, they do not define anything,
106739174c4SStanislav Mekhanoshin // thus there is nothing to set early-clobber.
isValidClauseInst(const MachineInstr & MI,bool IsVMEMClause)107739174c4SStanislav Mekhanoshin static bool isValidClauseInst(const MachineInstr &MI, bool IsVMEMClause) {
10841877b82SMatt Arsenault assert(!MI.isDebugInstr() && "debug instructions should not reach here");
10941877b82SMatt Arsenault if (MI.isBundled())
110739174c4SStanislav Mekhanoshin return false;
111739174c4SStanislav Mekhanoshin if (!MI.mayLoad() || MI.mayStore())
112739174c4SStanislav Mekhanoshin return false;
1135cf9292cSStanislav Mekhanoshin if (SIInstrInfo::isAtomic(MI))
114739174c4SStanislav Mekhanoshin return false;
115739174c4SStanislav Mekhanoshin if (IsVMEMClause && !isVMEMClauseInst(MI))
116739174c4SStanislav Mekhanoshin return false;
117739174c4SStanislav Mekhanoshin if (!IsVMEMClause && !isSMEMClauseInst(MI))
118739174c4SStanislav Mekhanoshin return false;
119f64f8efeSTim Renouf // If this is a load instruction where the result has been coalesced with an operand, then we cannot clause it.
120f64f8efeSTim Renouf for (const MachineOperand &ResMO : MI.defs()) {
1210c476111SDaniel Sanders Register ResReg = ResMO.getReg();
122f64f8efeSTim Renouf for (const MachineOperand &MO : MI.uses()) {
123f64f8efeSTim Renouf if (!MO.isReg() || MO.isDef())
124f64f8efeSTim Renouf continue;
125f64f8efeSTim Renouf if (MO.getReg() == ResReg)
126f64f8efeSTim Renouf return false;
127f64f8efeSTim Renouf }
128f64f8efeSTim Renouf break; // Only check the first def.
129f64f8efeSTim Renouf }
130739174c4SStanislav Mekhanoshin return true;
131739174c4SStanislav Mekhanoshin }
132739174c4SStanislav Mekhanoshin
getMopState(const MachineOperand & MO)133739174c4SStanislav Mekhanoshin static unsigned getMopState(const MachineOperand &MO) {
134739174c4SStanislav Mekhanoshin unsigned S = 0;
135739174c4SStanislav Mekhanoshin if (MO.isImplicit())
136739174c4SStanislav Mekhanoshin S |= RegState::Implicit;
137739174c4SStanislav Mekhanoshin if (MO.isDead())
138739174c4SStanislav Mekhanoshin S |= RegState::Dead;
139739174c4SStanislav Mekhanoshin if (MO.isUndef())
140739174c4SStanislav Mekhanoshin S |= RegState::Undef;
141739174c4SStanislav Mekhanoshin if (MO.isKill())
142739174c4SStanislav Mekhanoshin S |= RegState::Kill;
143739174c4SStanislav Mekhanoshin if (MO.isEarlyClobber())
144739174c4SStanislav Mekhanoshin S |= RegState::EarlyClobber;
14534978602SJay Foad if (MO.getReg().isPhysical() && MO.isRenamable())
146739174c4SStanislav Mekhanoshin S |= RegState::Renamable;
147739174c4SStanislav Mekhanoshin return S;
148739174c4SStanislav Mekhanoshin }
149739174c4SStanislav Mekhanoshin
150739174c4SStanislav Mekhanoshin // Returns false if there is a use of a def already in the map.
151739174c4SStanislav Mekhanoshin // In this case we must break the clause.
canBundle(const MachineInstr & MI,const RegUse & Defs,const RegUse & Uses) const1528f14a088SMatt Arsenault bool SIFormMemoryClauses::canBundle(const MachineInstr &MI, const RegUse &Defs,
1538f14a088SMatt Arsenault const RegUse &Uses) const {
154739174c4SStanislav Mekhanoshin // Check interference with defs.
155739174c4SStanislav Mekhanoshin for (const MachineOperand &MO : MI.operands()) {
156739174c4SStanislav Mekhanoshin // TODO: Prologue/Epilogue Insertion pass does not process bundled
157739174c4SStanislav Mekhanoshin // instructions.
158739174c4SStanislav Mekhanoshin if (MO.isFI())
159739174c4SStanislav Mekhanoshin return false;
160739174c4SStanislav Mekhanoshin
161739174c4SStanislav Mekhanoshin if (!MO.isReg())
162739174c4SStanislav Mekhanoshin continue;
163739174c4SStanislav Mekhanoshin
1640c476111SDaniel Sanders Register Reg = MO.getReg();
165739174c4SStanislav Mekhanoshin
166739174c4SStanislav Mekhanoshin // If it is tied we will need to write same register as we read.
167739174c4SStanislav Mekhanoshin if (MO.isTied())
168739174c4SStanislav Mekhanoshin return false;
169739174c4SStanislav Mekhanoshin
1708f14a088SMatt Arsenault const RegUse &Map = MO.isDef() ? Uses : Defs;
171739174c4SStanislav Mekhanoshin auto Conflict = Map.find(Reg);
172739174c4SStanislav Mekhanoshin if (Conflict == Map.end())
173739174c4SStanislav Mekhanoshin continue;
174739174c4SStanislav Mekhanoshin
17534978602SJay Foad if (Reg.isPhysical())
176739174c4SStanislav Mekhanoshin return false;
177739174c4SStanislav Mekhanoshin
178739174c4SStanislav Mekhanoshin LaneBitmask Mask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
179739174c4SStanislav Mekhanoshin if ((Conflict->second.second & Mask).any())
180739174c4SStanislav Mekhanoshin return false;
181739174c4SStanislav Mekhanoshin }
182739174c4SStanislav Mekhanoshin
183739174c4SStanislav Mekhanoshin return true;
184739174c4SStanislav Mekhanoshin }
185739174c4SStanislav Mekhanoshin
186739174c4SStanislav Mekhanoshin // Since all defs in the clause are early clobber we can run out of registers.
187739174c4SStanislav Mekhanoshin // Function returns false if pressure would hit the limit if instruction is
188739174c4SStanislav Mekhanoshin // bundled into a memory clause.
checkPressure(const MachineInstr & MI,GCNDownwardRPTracker & RPT)189739174c4SStanislav Mekhanoshin bool SIFormMemoryClauses::checkPressure(const MachineInstr &MI,
190477e3fe4SMatt Arsenault GCNDownwardRPTracker &RPT) {
191739174c4SStanislav Mekhanoshin // NB: skip advanceBeforeNext() call. Since all defs will be marked
192739174c4SStanislav Mekhanoshin // early-clobber they will all stay alive at least to the end of the
193477e3fe4SMatt Arsenault // clause. Therefor we should not decrease pressure even if load
194477e3fe4SMatt Arsenault // pointer becomes dead and could otherwise be reused for destination.
195739174c4SStanislav Mekhanoshin RPT.advanceToNext();
196477e3fe4SMatt Arsenault GCNRegPressure MaxPressure = RPT.moveMaxPressure();
197477e3fe4SMatt Arsenault unsigned Occupancy = MaxPressure.getOccupancy(*ST);
198e3c6fa36SMatt Arsenault
199e3c6fa36SMatt Arsenault // Don't push over half the register budget. We don't want to introduce
200e3c6fa36SMatt Arsenault // spilling just to form a soft clause.
201e3c6fa36SMatt Arsenault //
202e3c6fa36SMatt Arsenault // FIXME: This pressure check is fundamentally broken. First, this is checking
203e3c6fa36SMatt Arsenault // the global pressure, not the pressure at this specific point in the
204e3c6fa36SMatt Arsenault // program. Second, it's not accounting for the increased liveness of the use
205e3c6fa36SMatt Arsenault // operands due to the early clobber we will introduce. Third, the pressure
206e3c6fa36SMatt Arsenault // tracking does not account for the alignment requirements for SGPRs, or the
207e3c6fa36SMatt Arsenault // fragmentation of registers the allocator will need to satisfy.
208477e3fe4SMatt Arsenault if (Occupancy >= MFI->getMinAllowedOccupancy() &&
209a8d9d507SStanislav Mekhanoshin MaxPressure.getVGPRNum(ST->hasGFX90AInsts()) <= MaxVGPRs / 2 &&
210e3c6fa36SMatt Arsenault MaxPressure.getSGPRNum() <= MaxSGPRs / 2) {
211477e3fe4SMatt Arsenault LastRecordedOccupancy = Occupancy;
2121e377a27SMatt Arsenault return true;
213739174c4SStanislav Mekhanoshin }
214477e3fe4SMatt Arsenault return false;
215477e3fe4SMatt Arsenault }
216739174c4SStanislav Mekhanoshin
217739174c4SStanislav Mekhanoshin // Collect register defs and uses along with their lane masks and states.
collectRegUses(const MachineInstr & MI,RegUse & Defs,RegUse & Uses) const218739174c4SStanislav Mekhanoshin void SIFormMemoryClauses::collectRegUses(const MachineInstr &MI,
219739174c4SStanislav Mekhanoshin RegUse &Defs, RegUse &Uses) const {
220739174c4SStanislav Mekhanoshin for (const MachineOperand &MO : MI.operands()) {
221739174c4SStanislav Mekhanoshin if (!MO.isReg())
222739174c4SStanislav Mekhanoshin continue;
2230c476111SDaniel Sanders Register Reg = MO.getReg();
224739174c4SStanislav Mekhanoshin if (!Reg)
225739174c4SStanislav Mekhanoshin continue;
226739174c4SStanislav Mekhanoshin
22734978602SJay Foad LaneBitmask Mask = Reg.isVirtual()
2282bea69bfSDaniel Sanders ? TRI->getSubRegIndexLaneMask(MO.getSubReg())
2292bea69bfSDaniel Sanders : LaneBitmask::getAll();
230739174c4SStanislav Mekhanoshin RegUse &Map = MO.isDef() ? Defs : Uses;
231739174c4SStanislav Mekhanoshin
232739174c4SStanislav Mekhanoshin auto Loc = Map.find(Reg);
233739174c4SStanislav Mekhanoshin unsigned State = getMopState(MO);
234739174c4SStanislav Mekhanoshin if (Loc == Map.end()) {
235739174c4SStanislav Mekhanoshin Map[Reg] = std::make_pair(State, Mask);
236739174c4SStanislav Mekhanoshin } else {
237739174c4SStanislav Mekhanoshin Loc->second.first |= State;
238739174c4SStanislav Mekhanoshin Loc->second.second |= Mask;
239739174c4SStanislav Mekhanoshin }
240739174c4SStanislav Mekhanoshin }
241739174c4SStanislav Mekhanoshin }
242739174c4SStanislav Mekhanoshin
243739174c4SStanislav Mekhanoshin // Check register def/use conflicts, occupancy limits and collect def/use maps.
244*6527b2a4SSebastian Neubauer // Return true if instruction can be bundled with previous. If it cannot
245739174c4SStanislav Mekhanoshin // def/use maps are not updated.
processRegUses(const MachineInstr & MI,RegUse & Defs,RegUse & Uses,GCNDownwardRPTracker & RPT)246477e3fe4SMatt Arsenault bool SIFormMemoryClauses::processRegUses(const MachineInstr &MI,
247477e3fe4SMatt Arsenault RegUse &Defs, RegUse &Uses,
248477e3fe4SMatt Arsenault GCNDownwardRPTracker &RPT) {
249739174c4SStanislav Mekhanoshin if (!canBundle(MI, Defs, Uses))
250739174c4SStanislav Mekhanoshin return false;
251739174c4SStanislav Mekhanoshin
252477e3fe4SMatt Arsenault if (!checkPressure(MI, RPT))
253739174c4SStanislav Mekhanoshin return false;
254739174c4SStanislav Mekhanoshin
255739174c4SStanislav Mekhanoshin collectRegUses(MI, Defs, Uses);
256739174c4SStanislav Mekhanoshin return true;
257739174c4SStanislav Mekhanoshin }
258739174c4SStanislav Mekhanoshin
runOnMachineFunction(MachineFunction & MF)259739174c4SStanislav Mekhanoshin bool SIFormMemoryClauses::runOnMachineFunction(MachineFunction &MF) {
260739174c4SStanislav Mekhanoshin if (skipFunction(MF.getFunction()))
261739174c4SStanislav Mekhanoshin return false;
262739174c4SStanislav Mekhanoshin
2635bfbae5cSTom Stellard ST = &MF.getSubtarget<GCNSubtarget>();
264739174c4SStanislav Mekhanoshin if (!ST->isXNACKEnabled())
265739174c4SStanislav Mekhanoshin return false;
266739174c4SStanislav Mekhanoshin
267739174c4SStanislav Mekhanoshin const SIInstrInfo *TII = ST->getInstrInfo();
268739174c4SStanislav Mekhanoshin TRI = ST->getRegisterInfo();
269739174c4SStanislav Mekhanoshin MRI = &MF.getRegInfo();
270739174c4SStanislav Mekhanoshin MFI = MF.getInfo<SIMachineFunctionInfo>();
271739174c4SStanislav Mekhanoshin LiveIntervals *LIS = &getAnalysis<LiveIntervals>();
272739174c4SStanislav Mekhanoshin SlotIndexes *Ind = LIS->getSlotIndexes();
273739174c4SStanislav Mekhanoshin bool Changed = false;
274739174c4SStanislav Mekhanoshin
275739174c4SStanislav Mekhanoshin MaxVGPRs = TRI->getAllocatableSet(MF, &AMDGPU::VGPR_32RegClass).count();
276739174c4SStanislav Mekhanoshin MaxSGPRs = TRI->getAllocatableSet(MF, &AMDGPU::SGPR_32RegClass).count();
2777fecdf36STim Renouf unsigned FuncMaxClause = AMDGPU::getIntegerAttribute(
2787fecdf36STim Renouf MF.getFunction(), "amdgpu-max-memory-clause", MaxClause);
279739174c4SStanislav Mekhanoshin
280739174c4SStanislav Mekhanoshin for (MachineBasicBlock &MBB : MF) {
2815b648df1SChangpeng Fang GCNDownwardRPTracker RPT(*LIS);
282739174c4SStanislav Mekhanoshin MachineBasicBlock::instr_iterator Next;
283739174c4SStanislav Mekhanoshin for (auto I = MBB.instr_begin(), E = MBB.instr_end(); I != E; I = Next) {
284739174c4SStanislav Mekhanoshin MachineInstr &MI = *I;
285739174c4SStanislav Mekhanoshin Next = std::next(I);
286739174c4SStanislav Mekhanoshin
28781b2c23bSMatt Arsenault if (MI.isMetaInstruction())
28841877b82SMatt Arsenault continue;
28941877b82SMatt Arsenault
290739174c4SStanislav Mekhanoshin bool IsVMEM = isVMEMClauseInst(MI);
291739174c4SStanislav Mekhanoshin
292739174c4SStanislav Mekhanoshin if (!isValidClauseInst(MI, IsVMEM))
293739174c4SStanislav Mekhanoshin continue;
294739174c4SStanislav Mekhanoshin
2955b648df1SChangpeng Fang if (!RPT.getNext().isValid())
296739174c4SStanislav Mekhanoshin RPT.reset(MI);
2975b648df1SChangpeng Fang else { // Advance the state to the current MI.
2985b648df1SChangpeng Fang RPT.advance(MachineBasicBlock::const_iterator(MI));
2995b648df1SChangpeng Fang RPT.advanceBeforeNext();
3005b648df1SChangpeng Fang }
301739174c4SStanislav Mekhanoshin
3025b648df1SChangpeng Fang const GCNRPTracker::LiveRegSet LiveRegsCopy(RPT.getLiveRegs());
3035b648df1SChangpeng Fang RegUse Defs, Uses;
304477e3fe4SMatt Arsenault if (!processRegUses(MI, Defs, Uses, RPT)) {
3055b648df1SChangpeng Fang RPT.reset(MI, &LiveRegsCopy);
306739174c4SStanislav Mekhanoshin continue;
3075b648df1SChangpeng Fang }
308739174c4SStanislav Mekhanoshin
30981b2c23bSMatt Arsenault MachineBasicBlock::iterator LastClauseInst = Next;
310739174c4SStanislav Mekhanoshin unsigned Length = 1;
3117fecdf36STim Renouf for ( ; Next != E && Length < FuncMaxClause; ++Next) {
31281b2c23bSMatt Arsenault // Debug instructions should not change the kill insertion.
31381b2c23bSMatt Arsenault if (Next->isMetaInstruction())
31441877b82SMatt Arsenault continue;
31541877b82SMatt Arsenault
316739174c4SStanislav Mekhanoshin if (!isValidClauseInst(*Next, IsVMEM))
317739174c4SStanislav Mekhanoshin break;
318739174c4SStanislav Mekhanoshin
319739174c4SStanislav Mekhanoshin // A load from pointer which was loaded inside the same bundle is an
320739174c4SStanislav Mekhanoshin // impossible clause because we will need to write and read the same
321739174c4SStanislav Mekhanoshin // register inside. In this case processRegUses will return false.
322477e3fe4SMatt Arsenault if (!processRegUses(*Next, Defs, Uses, RPT))
323739174c4SStanislav Mekhanoshin break;
324739174c4SStanislav Mekhanoshin
32581b2c23bSMatt Arsenault LastClauseInst = Next;
326739174c4SStanislav Mekhanoshin ++Length;
327739174c4SStanislav Mekhanoshin }
3285b648df1SChangpeng Fang if (Length < 2) {
3295b648df1SChangpeng Fang RPT.reset(MI, &LiveRegsCopy);
330739174c4SStanislav Mekhanoshin continue;
3315b648df1SChangpeng Fang }
332739174c4SStanislav Mekhanoshin
333739174c4SStanislav Mekhanoshin Changed = true;
334739174c4SStanislav Mekhanoshin MFI->limitOccupancy(LastRecordedOccupancy);
335739174c4SStanislav Mekhanoshin
33681b2c23bSMatt Arsenault assert(!LastClauseInst->isMetaInstruction());
337739174c4SStanislav Mekhanoshin
33881b2c23bSMatt Arsenault SlotIndex ClauseLiveInIdx = LIS->getInstructionIndex(MI);
33981b2c23bSMatt Arsenault SlotIndex ClauseLiveOutIdx =
34081b2c23bSMatt Arsenault LIS->getInstructionIndex(*LastClauseInst).getNextIndex();
3415b648df1SChangpeng Fang
34281b2c23bSMatt Arsenault // Track the last inserted kill.
34381b2c23bSMatt Arsenault MachineInstrBuilder Kill;
34441877b82SMatt Arsenault
34581b2c23bSMatt Arsenault // Insert one kill per register, with operands covering all necessary
34681b2c23bSMatt Arsenault // subregisters.
34781b2c23bSMatt Arsenault for (auto &&R : Uses) {
34881b2c23bSMatt Arsenault Register Reg = R.first;
34981b2c23bSMatt Arsenault if (Reg.isPhysical())
35081b2c23bSMatt Arsenault continue;
35181b2c23bSMatt Arsenault
35281b2c23bSMatt Arsenault // Collect the register operands we should extend the live ranges of.
35381b2c23bSMatt Arsenault SmallVector<std::tuple<unsigned, unsigned>> KillOps;
35481b2c23bSMatt Arsenault const LiveInterval &LI = LIS->getInterval(R.first);
35581b2c23bSMatt Arsenault
35681b2c23bSMatt Arsenault if (!LI.hasSubRanges()) {
35781b2c23bSMatt Arsenault if (!LI.liveAt(ClauseLiveOutIdx)) {
35881b2c23bSMatt Arsenault KillOps.emplace_back(R.second.first | RegState::Kill,
35981b2c23bSMatt Arsenault AMDGPU::NoSubRegister);
36081b2c23bSMatt Arsenault }
36181b2c23bSMatt Arsenault } else {
36281b2c23bSMatt Arsenault LaneBitmask KilledMask;
36381b2c23bSMatt Arsenault for (const LiveInterval::SubRange &SR : LI.subranges()) {
36481b2c23bSMatt Arsenault if (SR.liveAt(ClauseLiveInIdx) && !SR.liveAt(ClauseLiveOutIdx))
36581b2c23bSMatt Arsenault KilledMask |= SR.LaneMask;
36681b2c23bSMatt Arsenault }
36781b2c23bSMatt Arsenault
36881b2c23bSMatt Arsenault if (KilledMask.none())
36981b2c23bSMatt Arsenault continue;
37081b2c23bSMatt Arsenault
37181b2c23bSMatt Arsenault SmallVector<unsigned> KilledIndexes;
37281b2c23bSMatt Arsenault bool Success = TRI->getCoveringSubRegIndexes(
37381b2c23bSMatt Arsenault *MRI, MRI->getRegClass(Reg), KilledMask, KilledIndexes);
37481b2c23bSMatt Arsenault (void)Success;
37581b2c23bSMatt Arsenault assert(Success && "Failed to find subregister mask to cover lanes");
37681b2c23bSMatt Arsenault for (unsigned SubReg : KilledIndexes) {
37781b2c23bSMatt Arsenault KillOps.emplace_back(R.second.first | RegState::Kill, SubReg);
37881b2c23bSMatt Arsenault }
37981b2c23bSMatt Arsenault }
38081b2c23bSMatt Arsenault
38181b2c23bSMatt Arsenault if (KillOps.empty())
38281b2c23bSMatt Arsenault continue;
38381b2c23bSMatt Arsenault
38481b2c23bSMatt Arsenault // We only want to extend the live ranges of used registers. If they
38581b2c23bSMatt Arsenault // already have existing uses beyond the bundle, we don't need the kill.
38681b2c23bSMatt Arsenault //
38781b2c23bSMatt Arsenault // It's possible all of the use registers were already live past the
38881b2c23bSMatt Arsenault // bundle.
38981b2c23bSMatt Arsenault Kill = BuildMI(*MI.getParent(), std::next(LastClauseInst),
39081b2c23bSMatt Arsenault DebugLoc(), TII->get(AMDGPU::KILL));
39181b2c23bSMatt Arsenault for (auto &Op : KillOps)
39281b2c23bSMatt Arsenault Kill.addUse(Reg, std::get<0>(Op), std::get<1>(Op));
39381b2c23bSMatt Arsenault Ind->insertMachineInstrInMaps(*Kill);
39481b2c23bSMatt Arsenault }
39581b2c23bSMatt Arsenault
39681b2c23bSMatt Arsenault if (!Kill) {
39781b2c23bSMatt Arsenault RPT.reset(MI, &LiveRegsCopy);
39841877b82SMatt Arsenault continue;
39941877b82SMatt Arsenault }
40041877b82SMatt Arsenault
40181b2c23bSMatt Arsenault // Restore the state after processing the end of the bundle.
40281b2c23bSMatt Arsenault RPT.reset(*Kill, &LiveRegsCopy);
403739174c4SStanislav Mekhanoshin
404739174c4SStanislav Mekhanoshin for (auto &&R : Defs) {
40534978602SJay Foad Register Reg = R.first;
406739174c4SStanislav Mekhanoshin Uses.erase(Reg);
40734978602SJay Foad if (Reg.isPhysical())
408739174c4SStanislav Mekhanoshin continue;
409739174c4SStanislav Mekhanoshin LIS->removeInterval(Reg);
410739174c4SStanislav Mekhanoshin LIS->createAndComputeVirtRegInterval(Reg);
411739174c4SStanislav Mekhanoshin }
412739174c4SStanislav Mekhanoshin
413739174c4SStanislav Mekhanoshin for (auto &&R : Uses) {
41434978602SJay Foad Register Reg = R.first;
41534978602SJay Foad if (Reg.isPhysical())
416739174c4SStanislav Mekhanoshin continue;
417739174c4SStanislav Mekhanoshin LIS->removeInterval(Reg);
418739174c4SStanislav Mekhanoshin LIS->createAndComputeVirtRegInterval(Reg);
419739174c4SStanislav Mekhanoshin }
420739174c4SStanislav Mekhanoshin }
421739174c4SStanislav Mekhanoshin }
422739174c4SStanislav Mekhanoshin
423739174c4SStanislav Mekhanoshin return Changed;
424739174c4SStanislav Mekhanoshin }
425