1 //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// \brief Computations in WWM can overwrite values in inactive channels for 12 /// variables that the register allocator thinks are dead. This pass adds fake 13 /// uses of those variables to WWM instructions to make sure that they aren't 14 /// overwritten. 15 /// 16 /// As an example, consider this snippet: 17 /// %vgpr0 = V_MOV_B32_e32 0.0 18 /// if (...) { 19 /// %vgpr1 = ... 20 /// %vgpr2 = WWM killed %vgpr1 21 /// ... = killed %vgpr2 22 /// %vgpr0 = V_MOV_B32_e32 1.0 23 /// } 24 /// ... = %vgpr0 25 /// 26 /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally, 27 /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since 28 /// writing %vgpr1 would only write to channels that would be clobbered by the 29 /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled, 30 /// it would clobber even the inactive channels for which the if-condition is 31 /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use 32 /// of %vgpr0 to the WWM instruction to make sure they aren't allocated to the 33 /// same register. 34 /// 35 /// In general, we need to figure out what registers might have their inactive 36 /// channels which are eventually used accidentally clobbered by a WWM 37 /// instruction. We approximate this using two conditions: 38 /// 39 /// 1. A definition of the variable reaches the WWM instruction. 40 /// 2. The variable would be live at the WWM instruction if all its defs were 41 /// partial defs (i.e. considered as a use), ignoring normal uses. 42 /// 43 /// If a register matches both conditions, then we add an implicit use of it to 44 /// the WWM instruction. Condition #2 is the heart of the matter: every 45 /// definition is really a partial definition, since every VALU instruction is 46 /// implicitly predicated. We can usually ignore this, but WWM forces us not 47 /// to. Condition #1 prevents false positives if the variable is undefined at 48 /// the WWM instruction anyways. This is overly conservative in certain cases, 49 /// especially in uniform control flow, but this is a workaround anyways until 50 /// LLVM gains the notion of predicated uses and definitions of variables. 51 /// 52 //===----------------------------------------------------------------------===// 53 54 #include "AMDGPU.h" 55 #include "AMDGPUSubtarget.h" 56 #include "SIInstrInfo.h" 57 #include "SIRegisterInfo.h" 58 #include "llvm/ADT/DepthFirstIterator.h" 59 #include "llvm/ADT/SparseBitVector.h" 60 #include "llvm/CodeGen/LiveIntervals.h" 61 #include "llvm/CodeGen/MachineFunctionPass.h" 62 #include "llvm/CodeGen/Passes.h" 63 #include "llvm/CodeGen/TargetRegisterInfo.h" 64 65 using namespace llvm; 66 67 #define DEBUG_TYPE "si-fix-wwm-liveness" 68 69 namespace { 70 71 class SIFixWWMLiveness : public MachineFunctionPass { 72 private: 73 LiveIntervals *LIS = nullptr; 74 const SIRegisterInfo *TRI; 75 MachineRegisterInfo *MRI; 76 77 public: 78 static char ID; 79 80 SIFixWWMLiveness() : MachineFunctionPass(ID) { 81 initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry()); 82 } 83 84 bool runOnMachineFunction(MachineFunction &MF) override; 85 86 bool runOnWWMInstruction(MachineInstr &MI); 87 88 void addDefs(const MachineInstr &MI, SparseBitVector<> &set); 89 90 StringRef getPassName() const override { return "SI Fix WWM Liveness"; } 91 92 void getAnalysisUsage(AnalysisUsage &AU) const override { 93 // Should preserve the same set that TwoAddressInstructions does. 94 AU.addPreserved<SlotIndexes>(); 95 AU.addPreserved<LiveIntervals>(); 96 AU.addPreservedID(LiveVariablesID); 97 AU.addPreservedID(MachineLoopInfoID); 98 AU.addPreservedID(MachineDominatorsID); 99 AU.setPreservesCFG(); 100 MachineFunctionPass::getAnalysisUsage(AU); 101 } 102 }; 103 104 } // End anonymous namespace. 105 106 INITIALIZE_PASS(SIFixWWMLiveness, DEBUG_TYPE, 107 "SI fix WWM liveness", false, false) 108 109 char SIFixWWMLiveness::ID = 0; 110 111 char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID; 112 113 FunctionPass *llvm::createSIFixWWMLivenessPass() { 114 return new SIFixWWMLiveness(); 115 } 116 117 void SIFixWWMLiveness::addDefs(const MachineInstr &MI, SparseBitVector<> &Regs) 118 { 119 for (const MachineOperand &Op : MI.defs()) { 120 if (Op.isReg()) { 121 unsigned Reg = Op.getReg(); 122 if (TRI->isVGPR(*MRI, Reg)) 123 Regs.set(Reg); 124 } 125 } 126 } 127 128 bool SIFixWWMLiveness::runOnWWMInstruction(MachineInstr &WWM) { 129 MachineBasicBlock *MBB = WWM.getParent(); 130 131 // Compute the registers that are live out of MI by figuring out which defs 132 // are reachable from MI. 133 SparseBitVector<> LiveOut; 134 135 for (auto II = MachineBasicBlock::iterator(WWM), IE = 136 MBB->end(); II != IE; ++II) { 137 addDefs(*II, LiveOut); 138 } 139 140 for (df_iterator<MachineBasicBlock *> I = ++df_begin(MBB), 141 E = df_end(MBB); 142 I != E; ++I) { 143 for (const MachineInstr &MI : **I) { 144 addDefs(MI, LiveOut); 145 } 146 } 147 148 // Compute the registers that reach MI. 149 SparseBitVector<> Reachable; 150 151 for (auto II = ++MachineBasicBlock::reverse_iterator(WWM), IE = 152 MBB->rend(); II != IE; ++II) { 153 addDefs(*II, Reachable); 154 } 155 156 for (idf_iterator<MachineBasicBlock *> I = ++idf_begin(MBB), 157 E = idf_end(MBB); 158 I != E; ++I) { 159 for (const MachineInstr &MI : **I) { 160 addDefs(MI, Reachable); 161 } 162 } 163 164 // find the intersection, and add implicit uses. 165 LiveOut &= Reachable; 166 167 bool Modified = false; 168 for (unsigned Reg : LiveOut) { 169 WWM.addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); 170 if (LIS) { 171 // FIXME: is there a better way to update the live interval? 172 LIS->removeInterval(Reg); 173 LIS->createAndComputeVirtRegInterval(Reg); 174 } 175 Modified = true; 176 } 177 178 return Modified; 179 } 180 181 bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) { 182 bool Modified = false; 183 184 // This doesn't actually need LiveIntervals, but we can preserve them. 185 LIS = getAnalysisIfAvailable<LiveIntervals>(); 186 187 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 188 const SIInstrInfo *TII = ST.getInstrInfo(); 189 190 TRI = &TII->getRegisterInfo(); 191 MRI = &MF.getRegInfo(); 192 193 for (MachineBasicBlock &MBB : MF) { 194 for (MachineInstr &MI : MBB) { 195 if (MI.getOpcode() == AMDGPU::EXIT_WWM) { 196 Modified |= runOnWWMInstruction(MI); 197 } 198 } 199 } 200 201 return Modified; 202 } 203