1 //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass tries to remove unnecessary VGPR live range in divergent if-else 11 /// structure. 12 /// 13 /// When we do structurization, we usually transform a if-else into two 14 /// sucessive if-then (with a flow block to do predicate inversion). Consider a 15 /// simple case after structurization: A divergent value %a was defined before 16 /// if-else and used in both THEN (use in THEN is optional) and ELSE part: 17 /// bb.if: 18 /// %a = ... 19 /// ... 20 /// bb.then: 21 /// ... = op %a 22 /// ... // %a can be dead here 23 /// bb.flow: 24 /// ... 25 /// bb.else: 26 /// ... = %a 27 /// ... 28 /// bb.endif 29 /// 30 /// As register allocator has no idea of the thread-control-flow, it will just 31 /// assume %a would be alive in the whole range of bb.then because of a later 32 /// use in bb.else. On AMDGPU architecture, the VGPR was accessed with respect 33 /// to exec mask. For this if-else case, the lanes active in bb.then will be 34 /// inactive in bb.else, and vice-verse. So we are safe to say that %a was dead 35 /// after the last use in bb.then untill the end of the block. The reason is 36 /// the instructions in bb.then will only overwrite lanes that will never be 37 /// accessed in bb.else. 38 /// 39 /// This pass aims to to tell register allocator that %a is in-fact dead, 40 /// through inserting a phi-node in bb.flow saying that %a is undef when coming 41 /// from bb.then, and then replace the uses in the bb.else with the result of 42 /// newly inserted phi. 43 /// 44 /// Two key conditions must be met to ensure correctness: 45 /// 1.) The def-point should be in the same loop-level as if-else-endif to make 46 /// sure the second loop iteration still get correct data. 47 /// 2.) There should be no further uses after the IF-ELSE region. 48 /// 49 // 50 //===----------------------------------------------------------------------===// 51 52 #include "AMDGPU.h" 53 #include "GCNSubtarget.h" 54 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 55 #include "SIMachineFunctionInfo.h" 56 #include "llvm/CodeGen/LiveVariables.h" 57 #include "llvm/CodeGen/MachineDominators.h" 58 #include "llvm/CodeGen/MachineLoopInfo.h" 59 #include "llvm/CodeGen/TargetRegisterInfo.h" 60 #include "llvm/InitializePasses.h" 61 62 using namespace llvm; 63 64 #define DEBUG_TYPE "si-opt-vgpr-liverange" 65 66 namespace { 67 68 class SIOptimizeVGPRLiveRange : public MachineFunctionPass { 69 private: 70 const SIRegisterInfo *TRI = nullptr; 71 const SIInstrInfo *TII = nullptr; 72 LiveVariables *LV = nullptr; 73 MachineDominatorTree *MDT = nullptr; 74 const MachineLoopInfo *Loops = nullptr; 75 MachineRegisterInfo *MRI = nullptr; 76 77 public: 78 static char ID; 79 80 MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const; 81 82 void collectElseRegionBlocks(MachineBasicBlock *Flow, 83 MachineBasicBlock *Endif, 84 SmallSetVector<MachineBasicBlock *, 16> &) const; 85 86 void 87 collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow, 88 MachineBasicBlock *Endif, 89 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, 90 SmallVectorImpl<Register> &CandidateRegs) const; 91 92 void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB, 93 SmallVectorImpl<MachineInstr *> &Uses) const; 94 95 void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If, 96 MachineBasicBlock *Flow) const; 97 98 void updateLiveRangeInElseRegion( 99 Register Reg, Register NewReg, MachineBasicBlock *Flow, 100 MachineBasicBlock *Endif, 101 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; 102 103 void 104 optimizeLiveRange(Register Reg, MachineBasicBlock *If, 105 MachineBasicBlock *Flow, MachineBasicBlock *Endif, 106 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; 107 108 SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {} 109 110 bool runOnMachineFunction(MachineFunction &MF) override; 111 112 StringRef getPassName() const override { 113 return "SI Optimize VGPR LiveRange"; 114 } 115 116 void getAnalysisUsage(AnalysisUsage &AU) const override { 117 AU.addRequired<LiveVariables>(); 118 AU.addRequired<MachineDominatorTree>(); 119 AU.addRequired<MachineLoopInfo>(); 120 AU.addPreserved<LiveVariables>(); 121 AU.addPreserved<MachineDominatorTree>(); 122 AU.addPreserved<MachineLoopInfo>(); 123 MachineFunctionPass::getAnalysisUsage(AU); 124 } 125 126 MachineFunctionProperties getRequiredProperties() const override { 127 return MachineFunctionProperties().set( 128 MachineFunctionProperties::Property::IsSSA); 129 } 130 }; 131 132 } // end anonymous namespace 133 134 // Check whether the MBB is a else flow block and get the branching target which 135 // is the Endif block 136 MachineBasicBlock * 137 SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const { 138 for (auto &BR : MBB->terminators()) { 139 if (BR.getOpcode() == AMDGPU::SI_ELSE) 140 return BR.getOperand(2).getMBB(); 141 } 142 return nullptr; 143 } 144 145 void SIOptimizeVGPRLiveRange::collectElseRegionBlocks( 146 MachineBasicBlock *Flow, MachineBasicBlock *Endif, 147 SmallSetVector<MachineBasicBlock *, 16> &Blocks) const { 148 assert(Flow != Endif); 149 150 MachineBasicBlock *MBB = Endif; 151 unsigned Cur = 0; 152 while (MBB) { 153 for (auto *Pred : MBB->predecessors()) { 154 if (Pred != Flow && !Blocks.contains(Pred)) 155 Blocks.insert(Pred); 156 } 157 158 if (Cur < Blocks.size()) 159 MBB = Blocks[Cur++]; 160 else 161 MBB = nullptr; 162 } 163 164 LLVM_DEBUG(dbgs() << "Found Else blocks: "); 165 for (auto *MBB : Blocks) 166 LLVM_DEBUG(dbgs() << printMBBReference(*MBB) << ' '); 167 LLVM_DEBUG(dbgs() << '\n'); 168 } 169 170 /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg. 171 void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock( 172 Register Reg, MachineBasicBlock *MBB, 173 SmallVectorImpl<MachineInstr *> &Uses) const { 174 for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) { 175 if (UseMI.getParent() == MBB && !UseMI.isPHI()) 176 Uses.push_back(&UseMI); 177 } 178 } 179 180 /// Collect the killed registers in the ELSE region which are not alive through 181 /// the whole THEN region. 182 void SIOptimizeVGPRLiveRange::collectCandidateRegisters( 183 MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif, 184 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, 185 SmallVectorImpl<Register> &CandidateRegs) const { 186 187 SmallSet<Register, 8> KillsInElse; 188 189 for (auto *Else : ElseBlocks) { 190 for (auto &MI : Else->instrs()) { 191 if (MI.isDebugInstr()) 192 continue; 193 194 for (auto &MO : MI.operands()) { 195 if (!MO.isReg() || !MO.getReg() || MO.isDef()) 196 continue; 197 198 Register MOReg = MO.getReg(); 199 // We can only optimize AGPR/VGPR virtual register 200 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) 201 continue; 202 203 if (MO.isKill() && MO.readsReg()) { 204 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); 205 const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); 206 // Make sure two conditions are met: 207 // a.) the value is defined before/in the IF block 208 // b.) should be defined in the same loop-level. 209 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && 210 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) 211 KillsInElse.insert(MOReg); 212 } 213 } 214 } 215 } 216 217 // Check the phis in the Endif, looking for value coming from the ELSE 218 // region. Make sure the phi-use is the last use. 219 for (auto &MI : Endif->phis()) { 220 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { 221 auto &MO = MI.getOperand(Idx); 222 auto *Pred = MI.getOperand(Idx + 1).getMBB(); 223 if (Pred == Flow) 224 continue; 225 assert(ElseBlocks.contains(Pred) && "Should be from Else region\n"); 226 227 if (!MO.isReg() || !MO.getReg() || MO.isUndef()) 228 continue; 229 230 Register Reg = MO.getReg(); 231 if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg)) 232 continue; 233 234 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 235 236 if (VI.isLiveIn(*Endif, Reg, *MRI)) { 237 LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI) 238 << " as Live in Endif\n"); 239 continue; 240 } 241 // Make sure two conditions are met: 242 // a.) the value is defined before/in the IF block 243 // b.) should be defined in the same loop-level. 244 const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent(); 245 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && 246 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) 247 KillsInElse.insert(Reg); 248 } 249 } 250 251 auto IsLiveThroughThen = [&](Register Reg) { 252 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; 253 ++I) { 254 if (!I->readsReg()) 255 continue; 256 auto *UseMI = I->getParent(); 257 auto *UseMBB = UseMI->getParent(); 258 if (UseMBB == Flow || UseMBB == Endif) { 259 if (!UseMI->isPHI()) 260 return true; 261 262 auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB(); 263 // The register is live through the path If->Flow or Flow->Endif. 264 // we should not optimize for such cases. 265 if ((UseMBB == Flow && IncomingMBB != If) || 266 (UseMBB == Endif && IncomingMBB == Flow)) 267 return true; 268 } 269 } 270 return false; 271 }; 272 273 for (auto Reg : KillsInElse) { 274 if (!IsLiveThroughThen(Reg)) 275 CandidateRegs.push_back(Reg); 276 } 277 } 278 279 // Re-calculate the liveness of \p Reg in the THEN-region 280 void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion( 281 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const { 282 283 SmallPtrSet<MachineBasicBlock *, 16> PHIIncoming; 284 285 MachineBasicBlock *ThenEntry = nullptr; 286 for (auto *Succ : If->successors()) { 287 if (Succ != Flow) { 288 ThenEntry = Succ; 289 break; 290 } 291 } 292 assert(ThenEntry && "No successor in Then region?"); 293 294 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 295 df_iterator_default_set<MachineBasicBlock *, 16> Visited; 296 297 for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) { 298 if (MBB == Flow) 299 break; 300 301 // Clear Live bit, as we will recalculate afterwards 302 LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB) 303 << '\n'); 304 OldVarInfo.AliveBlocks.reset(MBB->getNumber()); 305 } 306 307 // Get the blocks the Reg should be alive through 308 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; 309 ++I) { 310 auto *UseMI = I->getParent(); 311 if (UseMI->isPHI() && I->readsReg()) { 312 if (Visited.contains(UseMI->getParent())) 313 PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB()); 314 } 315 } 316 317 Visited.clear(); 318 319 for (MachineBasicBlock *MBB : depth_first_ext(ThenEntry, Visited)) { 320 if (MBB == Flow) 321 break; 322 323 SmallVector<MachineInstr *> Uses; 324 // PHI instructions has been processed before. 325 findNonPHIUsesInBlock(Reg, MBB, Uses); 326 327 if (Uses.size() == 1) { 328 LLVM_DEBUG(dbgs() << "Found one Non-PHI use in " 329 << printMBBReference(*MBB) << '\n'); 330 LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin())); 331 } else if (Uses.size() > 1) { 332 // Process the instructions in-order 333 LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in " 334 << printMBBReference(*MBB) << '\n'); 335 for (MachineInstr &MI : *MBB) { 336 if (llvm::is_contained(Uses, &MI)) 337 LV->HandleVirtRegUse(Reg, MBB, MI); 338 } 339 } 340 341 // Mark Reg alive through the block if this is a PHI incoming block 342 if (PHIIncoming.contains(MBB)) 343 LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(), 344 MBB); 345 } 346 347 // Set the isKilled flag if we get new Kills in the THEN region. 348 for (auto *MI : OldVarInfo.Kills) { 349 if (Visited.contains(MI->getParent())) 350 MI->addRegisterKilled(Reg, TRI); 351 } 352 } 353 354 void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion( 355 Register Reg, Register NewReg, MachineBasicBlock *Flow, 356 MachineBasicBlock *Endif, 357 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { 358 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); 359 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 360 361 // Transfer aliveBlocks from Reg to NewReg 362 for (auto *MBB : ElseBlocks) { 363 unsigned BBNum = MBB->getNumber(); 364 if (OldVarInfo.AliveBlocks.test(BBNum)) { 365 NewVarInfo.AliveBlocks.set(BBNum); 366 LLVM_DEBUG(dbgs() << "Removing ALiveBlock " << printMBBReference(*MBB) 367 << '\n'); 368 OldVarInfo.AliveBlocks.reset(BBNum); 369 } 370 } 371 372 // Transfer the possible Kills in ElseBlocks from Reg to NewReg 373 auto I = OldVarInfo.Kills.begin(); 374 while (I != OldVarInfo.Kills.end()) { 375 if (ElseBlocks.contains((*I)->getParent())) { 376 NewVarInfo.Kills.push_back(*I); 377 I = OldVarInfo.Kills.erase(I); 378 } else { 379 ++I; 380 } 381 } 382 } 383 384 void SIOptimizeVGPRLiveRange::optimizeLiveRange( 385 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow, 386 MachineBasicBlock *Endif, 387 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { 388 // Insert a new PHI, marking the value from the THEN region being 389 // undef. 390 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); 391 const auto *RC = MRI->getRegClass(Reg); 392 Register NewReg = MRI->createVirtualRegister(RC); 393 Register UndefReg = MRI->createVirtualRegister(RC); 394 MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(), 395 TII->get(TargetOpcode::PHI), NewReg); 396 for (auto *Pred : Flow->predecessors()) { 397 if (Pred == If) 398 PHI.addReg(Reg).addMBB(Pred); 399 else 400 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); 401 } 402 403 // Replace all uses in the ELSE region or the PHIs in ENDIF block 404 for (auto I = MRI->use_begin(Reg), E = MRI->use_end(); I != E;) { 405 MachineOperand &O = *I; 406 // This is a little bit tricky, the setReg() will update the linked list, 407 // so we have to increment the iterator before setReg() to avoid skipping 408 // some uses. 409 ++I; 410 auto *UseMI = O.getParent(); 411 auto *UseBlock = UseMI->getParent(); 412 // Replace uses in Endif block 413 if (UseBlock == Endif) { 414 assert(UseMI->isPHI() && "Uses should be PHI in Endif block"); 415 O.setReg(NewReg); 416 continue; 417 } 418 419 // Replace uses in Else region 420 if (ElseBlocks.contains(UseBlock)) 421 O.setReg(NewReg); 422 } 423 424 // The optimized Reg is not alive through Flow blocks anymore. 425 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 426 OldVarInfo.AliveBlocks.reset(Flow->getNumber()); 427 428 updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks); 429 updateLiveRangeInThenRegion(Reg, If, Flow); 430 } 431 432 char SIOptimizeVGPRLiveRange::ID = 0; 433 434 INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE, 435 "SI Optimize VGPR LiveRange", false, false) 436 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 437 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 438 INITIALIZE_PASS_DEPENDENCY(LiveVariables) 439 INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE, 440 "SI Optimize VGPR LiveRange", false, false) 441 442 char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID; 443 444 FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() { 445 return new SIOptimizeVGPRLiveRange(); 446 } 447 448 bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { 449 450 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 451 TII = ST.getInstrInfo(); 452 TRI = &TII->getRegisterInfo(); 453 MDT = &getAnalysis<MachineDominatorTree>(); 454 Loops = &getAnalysis<MachineLoopInfo>(); 455 LV = &getAnalysis<LiveVariables>(); 456 MRI = &MF.getRegInfo(); 457 458 if (skipFunction(MF.getFunction())) 459 return false; 460 461 bool MadeChange = false; 462 463 // TODO: we need to think about the order of visiting the blocks to get 464 // optimal result for nesting if-else cases. 465 for (MachineBasicBlock &MBB : MF) { 466 for (auto &MI : MBB.terminators()) { 467 // Detect the if-else blocks 468 if (MI.getOpcode() == AMDGPU::SI_IF) { 469 MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB(); 470 auto *Endif = getElseTarget(IfTarget); 471 if (!Endif) 472 continue; 473 474 SmallSetVector<MachineBasicBlock *, 16> ElseBlocks; 475 SmallVector<Register> CandidateRegs; 476 477 LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: " 478 << printMBBReference(MBB) << ' ' 479 << printMBBReference(*IfTarget) << ' ' 480 << printMBBReference(*Endif) << '\n'); 481 482 // Collect all the blocks in the ELSE region 483 collectElseRegionBlocks(IfTarget, Endif, ElseBlocks); 484 485 // Collect the registers can be optimized 486 collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks, 487 CandidateRegs); 488 MadeChange |= !CandidateRegs.empty(); 489 // Now we are safe to optimize. 490 for (auto Reg : CandidateRegs) 491 optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks); 492 } 493 } 494 } 495 496 return MadeChange; 497 } 498