1 //===-- SIFixWWMLiveness.cpp - Fix WWM live intervals ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// Computations in WWM can overwrite values in inactive channels for 12 /// variables that the register allocator thinks are dead. This pass adds fake 13 /// uses of those variables to their def(s) to make sure that they aren't 14 /// overwritten. 15 /// 16 /// As an example, consider this snippet: 17 /// %vgpr0 = V_MOV_B32_e32 0.0 18 /// if (...) { 19 /// %vgpr1 = ... 20 /// %vgpr2 = WWM killed %vgpr1 21 /// ... = killed %vgpr2 22 /// %vgpr0 = V_MOV_B32_e32 1.0 23 /// } 24 /// ... = %vgpr0 25 /// 26 /// The live intervals of %vgpr0 don't overlap with those of %vgpr1. Normally, 27 /// we can safely allocate %vgpr0 and %vgpr1 in the same register, since 28 /// writing %vgpr1 would only write to channels that would be clobbered by the 29 /// second write to %vgpr0 anyways. But if %vgpr1 is written with WWM enabled, 30 /// it would clobber even the inactive channels for which the if-condition is 31 /// false, for which %vgpr0 is supposed to be 0. This pass adds an implicit use 32 /// of %vgpr0 to its def to make sure they aren't allocated to the 33 /// same register. 34 /// 35 /// In general, we need to figure out what registers might have their inactive 36 /// channels which are eventually used accidentally clobbered by a WWM 37 /// instruction. We do that by spotting three separate cases of registers: 38 /// 39 /// 1. A "then phi": the value resulting from phi elimination of a phi node at 40 /// the end of an if..endif. If there is WWM code in the "then", then we 41 /// make the def at the end of the "then" branch a partial def by adding an 42 /// implicit use of the register. 43 /// 44 /// 2. A "loop exit register": a value written inside a loop but used outside the 45 /// loop, where there is WWM code inside the loop (the case in the example 46 /// above). We add an implicit_def of the register in the loop pre-header, 47 /// and make the original def a partial def by adding an implicit use of the 48 /// register. 49 /// 50 /// 3. A "loop exit phi": the value resulting from phi elimination of a phi node 51 /// in a loop header. If there is WWM code inside the loop, then we make all 52 /// defs inside the loop partial defs by adding an implicit use of the 53 /// register on each one. 54 /// 55 /// Note that we do not need to consider an if..else..endif phi. We only need to 56 /// consider non-uniform control flow, and control flow structurization would 57 /// have transformed a non-uniform if..else..endif into two if..endifs. 58 /// 59 /// The analysis to detect these cases relies on a property of the MIR 60 /// arising from this pass running straight after PHIElimination and before any 61 /// coalescing: that any virtual register with more than one definition must be 62 /// the new register added to lower a phi node by PHIElimination. 63 /// 64 /// FIXME: We should detect whether a register in one of the above categories is 65 /// already live at the WWM code before deciding to add the implicit uses to 66 /// synthesize its liveness. 67 /// 68 /// FIXME: I believe this whole scheme may be flawed due to the possibility of 69 /// the register allocator doing live interval splitting. 70 /// 71 //===----------------------------------------------------------------------===// 72 73 #include "AMDGPU.h" 74 #include "AMDGPUSubtarget.h" 75 #include "SIInstrInfo.h" 76 #include "SIRegisterInfo.h" 77 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 78 #include "llvm/ADT/DepthFirstIterator.h" 79 #include "llvm/ADT/SparseBitVector.h" 80 #include "llvm/CodeGen/LiveIntervals.h" 81 #include "llvm/CodeGen/MachineDominators.h" 82 #include "llvm/CodeGen/MachineFunctionPass.h" 83 #include "llvm/CodeGen/MachineLoopInfo.h" 84 #include "llvm/CodeGen/Passes.h" 85 #include "llvm/CodeGen/TargetRegisterInfo.h" 86 87 using namespace llvm; 88 89 #define DEBUG_TYPE "si-fix-wwm-liveness" 90 91 namespace { 92 93 class SIFixWWMLiveness : public MachineFunctionPass { 94 private: 95 MachineDominatorTree *DomTree; 96 MachineLoopInfo *LoopInfo; 97 LiveIntervals *LIS = nullptr; 98 const SIInstrInfo *TII; 99 const SIRegisterInfo *TRI; 100 MachineRegisterInfo *MRI; 101 102 std::vector<MachineInstr *> WWMs; 103 std::vector<MachineOperand *> ThenDefs; 104 std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopExitDefs; 105 std::vector<std::pair<MachineOperand *, MachineLoop *>> LoopPhiDefs; 106 107 public: 108 static char ID; 109 110 SIFixWWMLiveness() : MachineFunctionPass(ID) { 111 initializeSIFixWWMLivenessPass(*PassRegistry::getPassRegistry()); 112 } 113 114 bool runOnMachineFunction(MachineFunction &MF) override; 115 116 StringRef getPassName() const override { return "SI Fix WWM Liveness"; } 117 118 void getAnalysisUsage(AnalysisUsage &AU) const override { 119 AU.addRequiredID(MachineDominatorsID); 120 AU.addRequiredID(MachineLoopInfoID); 121 // Should preserve the same set that TwoAddressInstructions does. 122 AU.addPreserved<SlotIndexes>(); 123 AU.addPreserved<LiveIntervals>(); 124 AU.addPreservedID(LiveVariablesID); 125 AU.addPreservedID(MachineLoopInfoID); 126 AU.addPreservedID(MachineDominatorsID); 127 AU.setPreservesCFG(); 128 MachineFunctionPass::getAnalysisUsage(AU); 129 } 130 131 private: 132 void processDef(MachineOperand &DefOpnd); 133 bool processThenDef(MachineOperand *DefOpnd); 134 bool processLoopExitDef(MachineOperand *DefOpnd, MachineLoop *Loop); 135 bool processLoopPhiDef(MachineOperand *DefOpnd, MachineLoop *Loop); 136 }; 137 138 } // End anonymous namespace. 139 140 INITIALIZE_PASS_BEGIN(SIFixWWMLiveness, DEBUG_TYPE, 141 "SI fix WWM liveness", false, false) 142 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 143 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 144 INITIALIZE_PASS_END(SIFixWWMLiveness, DEBUG_TYPE, 145 "SI fix WWM liveness", false, false) 146 147 char SIFixWWMLiveness::ID = 0; 148 149 char &llvm::SIFixWWMLivenessID = SIFixWWMLiveness::ID; 150 151 FunctionPass *llvm::createSIFixWWMLivenessPass() { 152 return new SIFixWWMLiveness(); 153 } 154 155 bool SIFixWWMLiveness::runOnMachineFunction(MachineFunction &MF) { 156 LLVM_DEBUG(dbgs() << "SIFixWWMLiveness: function " << MF.getName() << "\n"); 157 bool Modified = false; 158 159 // This doesn't actually need LiveIntervals, but we can preserve them. 160 LIS = getAnalysisIfAvailable<LiveIntervals>(); 161 162 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 163 164 TII = ST.getInstrInfo(); 165 TRI = &TII->getRegisterInfo(); 166 MRI = &MF.getRegInfo(); 167 168 DomTree = &getAnalysis<MachineDominatorTree>(); 169 LoopInfo = &getAnalysis<MachineLoopInfo>(); 170 171 // Scan the function to find the WWM sections and the candidate registers for 172 // having liveness modified. 173 for (MachineBasicBlock &MBB : MF) { 174 for (MachineInstr &MI : MBB) { 175 if (MI.getOpcode() == AMDGPU::EXIT_WWM) 176 WWMs.push_back(&MI); 177 else { 178 for (MachineOperand &DefOpnd : MI.defs()) { 179 if (DefOpnd.isReg()) { 180 unsigned Reg = DefOpnd.getReg(); 181 if (TRI->isVGPR(*MRI, Reg)) 182 processDef(DefOpnd); 183 } 184 } 185 } 186 } 187 } 188 if (!WWMs.empty()) { 189 // Synthesize liveness over WWM sections as required. 190 for (auto ThenDef : ThenDefs) 191 Modified |= processThenDef(ThenDef); 192 for (auto LoopExitDef : LoopExitDefs) 193 Modified |= processLoopExitDef(LoopExitDef.first, LoopExitDef.second); 194 for (auto LoopPhiDef : LoopPhiDefs) 195 Modified |= processLoopPhiDef(LoopPhiDef.first, LoopPhiDef.second); 196 } 197 198 WWMs.clear(); 199 ThenDefs.clear(); 200 LoopExitDefs.clear(); 201 LoopPhiDefs.clear(); 202 203 return Modified; 204 } 205 206 // During the function scan, process an operand that defines a VGPR. 207 // This categorizes the register and puts it in the appropriate list for later 208 // use when processing a WWM section. 209 void SIFixWWMLiveness::processDef(MachineOperand &DefOpnd) { 210 unsigned Reg = DefOpnd.getReg(); 211 // Get all the defining instructions. For convenience, make Defs[0] the def 212 // we are on now. 213 SmallVector<const MachineInstr *, 4> Defs; 214 Defs.push_back(DefOpnd.getParent()); 215 for (auto &MI : MRI->def_instructions(Reg)) { 216 if (&MI != DefOpnd.getParent()) 217 Defs.push_back(&MI); 218 } 219 // Check whether this def dominates all the others. If not, ignore this def. 220 // Either it is going to be processed when the scan encounters its other def 221 // that dominates all defs, or there is no def that dominates all others. 222 // The latter case is an eliminated phi from an if..else..endif or similar, 223 // which must be for uniform control flow so can be ignored. 224 // Because this pass runs shortly after PHIElimination, we assume that any 225 // multi-def register is a lowered phi, and thus has each def in a separate 226 // basic block. 227 for (unsigned I = 1; I != Defs.size(); ++I) { 228 if (!DomTree->dominates(Defs[0]->getParent(), Defs[I]->getParent())) 229 return; 230 } 231 // Check for the case of an if..endif lowered phi: It has two defs, one 232 // dominates the other, and there is a single use in a successor of the 233 // dominant def. 234 // Later we will spot any WWM code inside 235 // the "then" clause and turn the second def into a partial def so its 236 // liveness goes through the WWM code in the "then" clause. 237 if (Defs.size() == 2) { 238 auto DomDefBlock = Defs[0]->getParent(); 239 if (DomDefBlock->succ_size() == 2 && MRI->hasOneUse(Reg)) { 240 auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent(); 241 for (auto Succ : DomDefBlock->successors()) { 242 if (Succ == UseBlock) { 243 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << " is a then phi reg\n"); 244 ThenDefs.push_back(&DefOpnd); 245 return; 246 } 247 } 248 } 249 } 250 // Check for the case of a non-lowered-phi register (single def) that exits 251 // a loop, that is, it has a use that is outside a loop that the def is 252 // inside. We find the outermost loop that the def is inside but a use is 253 // outside. Later we will spot any WWM code inside that loop and then make 254 // the def a partial def so its liveness goes round the loop and through the 255 // WWM code. 256 if (Defs.size() == 1) { 257 auto Loop = LoopInfo->getLoopFor(Defs[0]->getParent()); 258 if (!Loop) 259 return; 260 bool IsLoopExit = false; 261 for (auto &Use : MRI->use_instructions(Reg)) { 262 auto UseBlock = Use.getParent(); 263 if (Loop->contains(UseBlock)) 264 continue; 265 IsLoopExit = true; 266 while (auto Parent = Loop->getParentLoop()) { 267 if (Parent->contains(UseBlock)) 268 break; 269 Loop = Parent; 270 } 271 } 272 if (!IsLoopExit) 273 return; 274 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) 275 << " is a loop exit reg with loop header at " 276 << "bb." << Loop->getHeader()->getNumber() << "\n"); 277 LoopExitDefs.push_back(std::pair<MachineOperand *, MachineLoop *>( 278 &DefOpnd, Loop)); 279 return; 280 } 281 // Check for the case of a lowered single-preheader-loop phi, that is, a 282 // multi-def register where the dominating def is in the loop pre-header and 283 // all other defs are in backedges. Later we will spot any WWM code inside 284 // that loop and then make the backedge defs partial defs so the liveness 285 // goes through the WWM code. 286 // Note that we are ignoring multi-preheader loops on the basis that the 287 // structurizer does not allow that for non-uniform loops. 288 // There must be a single use in the loop header. 289 if (!MRI->hasOneUse(Reg)) 290 return; 291 auto UseBlock = MRI->use_begin(Reg)->getParent()->getParent(); 292 auto Loop = LoopInfo->getLoopFor(UseBlock); 293 if (!Loop || Loop->getHeader() != UseBlock 294 || Loop->contains(Defs[0]->getParent())) { 295 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) 296 << " is multi-def but single use not in loop header\n"); 297 return; 298 } 299 for (unsigned I = 1; I != Defs.size(); ++I) { 300 if (!Loop->contains(Defs[I]->getParent())) 301 return; 302 } 303 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) 304 << " is a loop phi reg with loop header at " 305 << "bb." << Loop->getHeader()->getNumber() << "\n"); 306 LoopPhiDefs.push_back( 307 std::pair<MachineOperand *, MachineLoop *>(&DefOpnd, Loop)); 308 } 309 310 // Process a then phi def: It has two defs, one dominates the other, and there 311 // is a single use in a successor of the dominant def. Here we spot any WWM 312 // code inside the "then" clause and turn the second def into a partial def so 313 // its liveness goes through the WWM code in the "then" clause. 314 bool SIFixWWMLiveness::processThenDef(MachineOperand *DefOpnd) { 315 LLVM_DEBUG(dbgs() << "Processing then def: " << *DefOpnd->getParent()); 316 if (DefOpnd->getParent()->getOpcode() == TargetOpcode::IMPLICIT_DEF) { 317 // Ignore if dominating def is undef. 318 LLVM_DEBUG(dbgs() << " ignoring as dominating def is undef\n"); 319 return false; 320 } 321 unsigned Reg = DefOpnd->getReg(); 322 // Get the use block, which is the endif block. 323 auto UseBlock = MRI->use_instr_begin(Reg)->getParent(); 324 // Check whether there is WWM code inside the then branch. The WWM code must 325 // be dominated by the if but not dominated by the endif. 326 bool ContainsWWM = false; 327 for (auto WWM : WWMs) { 328 if (DomTree->dominates(DefOpnd->getParent()->getParent(), WWM->getParent()) 329 && !DomTree->dominates(UseBlock, WWM->getParent())) { 330 LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM); 331 ContainsWWM = true; 332 break; 333 } 334 } 335 if (!ContainsWWM) 336 return false; 337 // Get the other def. 338 MachineInstr *OtherDef = nullptr; 339 for (auto &MI : MRI->def_instructions(Reg)) { 340 if (&MI != DefOpnd->getParent()) 341 OtherDef = &MI; 342 } 343 // Make it a partial def. 344 OtherDef->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); 345 LLVM_DEBUG(dbgs() << *OtherDef); 346 return true; 347 } 348 349 // Process a loop exit def, that is, a register with a single use in a loop 350 // that has a use outside the loop. Here we spot any WWM code inside that loop 351 // and then make the def a partial def so its liveness goes round the loop and 352 // through the WWM code. 353 bool SIFixWWMLiveness::processLoopExitDef(MachineOperand *DefOpnd, 354 MachineLoop *Loop) { 355 LLVM_DEBUG(dbgs() << "Processing loop exit def: " << *DefOpnd->getParent()); 356 // Check whether there is WWM code inside the loop. 357 bool ContainsWWM = false; 358 for (auto WWM : WWMs) { 359 if (Loop->contains(WWM->getParent())) { 360 LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM); 361 ContainsWWM = true; 362 break; 363 } 364 } 365 if (!ContainsWWM) 366 return false; 367 unsigned Reg = DefOpnd->getReg(); 368 // Add a new implicit_def in loop preheader(s). 369 for (auto Pred : Loop->getHeader()->predecessors()) { 370 if (!Loop->contains(Pred)) { 371 auto ImplicitDef = BuildMI(*Pred, Pred->getFirstTerminator(), DebugLoc(), 372 TII->get(TargetOpcode::IMPLICIT_DEF), Reg); 373 LLVM_DEBUG(dbgs() << *ImplicitDef); 374 (void)ImplicitDef; 375 } 376 } 377 // Make the original def partial. 378 DefOpnd->getParent()->addOperand(MachineOperand::CreateReg( 379 Reg, false, /*isImp=*/true)); 380 LLVM_DEBUG(dbgs() << *DefOpnd->getParent()); 381 return true; 382 } 383 384 // Process a loop phi def, that is, a multi-def register where the dominating 385 // def is in the loop pre-header and all other defs are in backedges. Here we 386 // spot any WWM code inside that loop and then make the backedge defs partial 387 // defs so the liveness goes through the WWM code. 388 bool SIFixWWMLiveness::processLoopPhiDef(MachineOperand *DefOpnd, 389 MachineLoop *Loop) { 390 LLVM_DEBUG(dbgs() << "Processing loop phi def: " << *DefOpnd->getParent()); 391 // Check whether there is WWM code inside the loop. 392 bool ContainsWWM = false; 393 for (auto WWM : WWMs) { 394 if (Loop->contains(WWM->getParent())) { 395 LLVM_DEBUG(dbgs() << " contains WWM: " << *WWM); 396 ContainsWWM = true; 397 break; 398 } 399 } 400 if (!ContainsWWM) 401 return false; 402 unsigned Reg = DefOpnd->getReg(); 403 // Remove kill mark from uses. 404 for (auto &Use : MRI->use_operands(Reg)) 405 Use.setIsKill(false); 406 // Make all defs except the dominating one partial defs. 407 SmallVector<MachineInstr *, 4> Defs; 408 for (auto &Def : MRI->def_instructions(Reg)) 409 Defs.push_back(&Def); 410 for (auto Def : Defs) { 411 if (DefOpnd->getParent() == Def) 412 continue; 413 Def->addOperand(MachineOperand::CreateReg(Reg, false, /*isImp=*/true)); 414 LLVM_DEBUG(dbgs() << *Def); 415 } 416 return true; 417 } 418 419