1 //===--------------------- SIOptimizeVGPRLiveRange.cpp -------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass tries to remove unnecessary VGPR live ranges in divergent if-else 11 /// structures and waterfall loops. 12 /// 13 /// When we do structurization, we usually transform an if-else into two 14 /// successive if-then (with a flow block to do predicate inversion). Consider a 15 /// simple case after structurization: A divergent value %a was defined before 16 /// if-else and used in both THEN (use in THEN is optional) and ELSE part: 17 /// bb.if: 18 /// %a = ... 19 /// ... 20 /// bb.then: 21 /// ... = op %a 22 /// ... // %a can be dead here 23 /// bb.flow: 24 /// ... 25 /// bb.else: 26 /// ... = %a 27 /// ... 28 /// bb.endif 29 /// 30 /// As register allocator has no idea of the thread-control-flow, it will just 31 /// assume %a would be alive in the whole range of bb.then because of a later 32 /// use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect 33 /// to exec mask. For this if-else case, the lanes active in bb.then will be 34 /// inactive in bb.else, and vice-versa. So we are safe to say that %a was dead 35 /// after the last use in bb.then until the end of the block. The reason is 36 /// the instructions in bb.then will only overwrite lanes that will never be 37 /// accessed in bb.else. 38 /// 39 /// This pass aims to to tell register allocator that %a is in-fact dead, 40 /// through inserting a phi-node in bb.flow saying that %a is undef when coming 41 /// from bb.then, and then replace the uses in the bb.else with the result of 42 /// newly inserted phi. 43 /// 44 /// Two key conditions must be met to ensure correctness: 45 /// 1.) The def-point should be in the same loop-level as if-else-endif to make 46 /// sure the second loop iteration still get correct data. 47 /// 2.) There should be no further uses after the IF-ELSE region. 48 /// 49 /// 50 /// Waterfall loops get inserted around instructions that use divergent values 51 /// but can only be executed with a uniform value. For example an indirect call 52 /// to a divergent address: 53 /// bb.start: 54 /// %a = ... 55 /// %fun = ... 56 /// ... 57 /// bb.loop: 58 /// call %fun (%a) 59 /// ... // %a can be dead here 60 /// loop %bb.loop 61 /// 62 /// The loop block is executed multiple times, but it is run exactly once for 63 /// each active lane. Similar to the if-else case, the register allocator 64 /// assumes that %a is live throughout the loop as it is used again in the next 65 /// iteration. If %a is a VGPR that is unused after the loop, it does not need 66 /// to be live after its last use in the loop block. By inserting a phi-node at 67 /// the start of bb.loop that is undef when coming from bb.loop, the register 68 /// allocation knows that the value of %a does not need to be preserved through 69 /// iterations of the loop. 70 /// 71 // 72 //===----------------------------------------------------------------------===// 73 74 #include "AMDGPU.h" 75 #include "GCNSubtarget.h" 76 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 77 #include "SIMachineFunctionInfo.h" 78 #include "llvm/CodeGen/LiveVariables.h" 79 #include "llvm/CodeGen/MachineDominators.h" 80 #include "llvm/CodeGen/MachineLoopInfo.h" 81 #include "llvm/CodeGen/TargetRegisterInfo.h" 82 #include "llvm/InitializePasses.h" 83 84 using namespace llvm; 85 86 #define DEBUG_TYPE "si-opt-vgpr-liverange" 87 88 namespace { 89 90 class SIOptimizeVGPRLiveRange : public MachineFunctionPass { 91 private: 92 const SIRegisterInfo *TRI = nullptr; 93 const SIInstrInfo *TII = nullptr; 94 LiveVariables *LV = nullptr; 95 MachineDominatorTree *MDT = nullptr; 96 const MachineLoopInfo *Loops = nullptr; 97 MachineRegisterInfo *MRI = nullptr; 98 99 public: 100 static char ID; 101 102 MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const; 103 104 void collectElseRegionBlocks(MachineBasicBlock *Flow, 105 MachineBasicBlock *Endif, 106 SmallSetVector<MachineBasicBlock *, 16> &) const; 107 108 void 109 collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow, 110 MachineBasicBlock *Endif, 111 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, 112 SmallVectorImpl<Register> &CandidateRegs) const; 113 114 void collectWaterfallCandidateRegisters( 115 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, 116 SmallSetVector<Register, 16> &CandidateRegs, 117 SmallSetVector<MachineBasicBlock *, 2> &Blocks, 118 SmallVectorImpl<MachineInstr *> &Instructions) const; 119 120 void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB, 121 SmallVectorImpl<MachineInstr *> &Uses) const; 122 123 void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If, 124 MachineBasicBlock *Flow) const; 125 126 void updateLiveRangeInElseRegion( 127 Register Reg, Register NewReg, MachineBasicBlock *Flow, 128 MachineBasicBlock *Endif, 129 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; 130 131 void 132 optimizeLiveRange(Register Reg, MachineBasicBlock *If, 133 MachineBasicBlock *Flow, MachineBasicBlock *Endif, 134 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const; 135 136 void optimizeWaterfallLiveRange( 137 Register Reg, MachineBasicBlock *LoopHeader, 138 SmallSetVector<MachineBasicBlock *, 2> &LoopBlocks, 139 SmallVectorImpl<MachineInstr *> &Instructions) const; 140 141 SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {} 142 143 bool runOnMachineFunction(MachineFunction &MF) override; 144 145 StringRef getPassName() const override { 146 return "SI Optimize VGPR LiveRange"; 147 } 148 149 void getAnalysisUsage(AnalysisUsage &AU) const override { 150 AU.addRequired<LiveVariables>(); 151 AU.addRequired<MachineDominatorTree>(); 152 AU.addRequired<MachineLoopInfo>(); 153 AU.addPreserved<LiveVariables>(); 154 AU.addPreserved<MachineDominatorTree>(); 155 AU.addPreserved<MachineLoopInfo>(); 156 MachineFunctionPass::getAnalysisUsage(AU); 157 } 158 159 MachineFunctionProperties getRequiredProperties() const override { 160 return MachineFunctionProperties().set( 161 MachineFunctionProperties::Property::IsSSA); 162 } 163 164 MachineFunctionProperties getClearedProperties() const override { 165 return MachineFunctionProperties().set( 166 MachineFunctionProperties::Property::NoPHIs); 167 } 168 }; 169 170 } // end anonymous namespace 171 172 // Check whether the MBB is a else flow block and get the branching target which 173 // is the Endif block 174 MachineBasicBlock * 175 SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const { 176 for (auto &BR : MBB->terminators()) { 177 if (BR.getOpcode() == AMDGPU::SI_ELSE) 178 return BR.getOperand(2).getMBB(); 179 } 180 return nullptr; 181 } 182 183 void SIOptimizeVGPRLiveRange::collectElseRegionBlocks( 184 MachineBasicBlock *Flow, MachineBasicBlock *Endif, 185 SmallSetVector<MachineBasicBlock *, 16> &Blocks) const { 186 assert(Flow != Endif); 187 188 MachineBasicBlock *MBB = Endif; 189 unsigned Cur = 0; 190 while (MBB) { 191 for (auto *Pred : MBB->predecessors()) { 192 if (Pred != Flow && !Blocks.contains(Pred)) 193 Blocks.insert(Pred); 194 } 195 196 if (Cur < Blocks.size()) 197 MBB = Blocks[Cur++]; 198 else 199 MBB = nullptr; 200 } 201 202 LLVM_DEBUG({ 203 dbgs() << "Found Else blocks: "; 204 for (auto *MBB : Blocks) 205 dbgs() << printMBBReference(*MBB) << ' '; 206 dbgs() << '\n'; 207 }); 208 } 209 210 /// Find the instructions(excluding phi) in \p MBB that uses the \p Reg. 211 void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock( 212 Register Reg, MachineBasicBlock *MBB, 213 SmallVectorImpl<MachineInstr *> &Uses) const { 214 for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) { 215 if (UseMI.getParent() == MBB && !UseMI.isPHI()) 216 Uses.push_back(&UseMI); 217 } 218 } 219 220 /// Collect the killed registers in the ELSE region which are not alive through 221 /// the whole THEN region. 222 void SIOptimizeVGPRLiveRange::collectCandidateRegisters( 223 MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif, 224 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks, 225 SmallVectorImpl<Register> &CandidateRegs) const { 226 227 SmallSet<Register, 8> KillsInElse; 228 229 for (auto *Else : ElseBlocks) { 230 for (auto &MI : Else->instrs()) { 231 if (MI.isDebugInstr()) 232 continue; 233 234 for (auto &MO : MI.operands()) { 235 if (!MO.isReg() || !MO.getReg() || MO.isDef()) 236 continue; 237 238 Register MOReg = MO.getReg(); 239 // We can only optimize AGPR/VGPR virtual register 240 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) 241 continue; 242 243 if (MO.readsReg()) { 244 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); 245 const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); 246 // Make sure two conditions are met: 247 // a.) the value is defined before/in the IF block 248 // b.) should be defined in the same loop-level. 249 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && 250 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) { 251 // Check if the register is live into the endif block. If not, 252 // consider it killed in the else region. 253 LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg); 254 if (!VI.isLiveIn(*Endif, MOReg, *MRI)) { 255 KillsInElse.insert(MOReg); 256 } else { 257 LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI) 258 << " as Live in Endif\n"); 259 } 260 } 261 } 262 } 263 } 264 } 265 266 // Check the phis in the Endif, looking for value coming from the ELSE 267 // region. Make sure the phi-use is the last use. 268 for (auto &MI : Endif->phis()) { 269 for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) { 270 auto &MO = MI.getOperand(Idx); 271 auto *Pred = MI.getOperand(Idx + 1).getMBB(); 272 if (Pred == Flow) 273 continue; 274 assert(ElseBlocks.contains(Pred) && "Should be from Else region\n"); 275 276 if (!MO.isReg() || !MO.getReg() || MO.isUndef()) 277 continue; 278 279 Register Reg = MO.getReg(); 280 if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg)) 281 continue; 282 283 LiveVariables::VarInfo &VI = LV->getVarInfo(Reg); 284 285 if (VI.isLiveIn(*Endif, Reg, *MRI)) { 286 LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI) 287 << " as Live in Endif\n"); 288 continue; 289 } 290 // Make sure two conditions are met: 291 // a.) the value is defined before/in the IF block 292 // b.) should be defined in the same loop-level. 293 const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent(); 294 if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) && 295 Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) 296 KillsInElse.insert(Reg); 297 } 298 } 299 300 auto IsLiveThroughThen = [&](Register Reg) { 301 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; 302 ++I) { 303 if (!I->readsReg()) 304 continue; 305 auto *UseMI = I->getParent(); 306 auto *UseMBB = UseMI->getParent(); 307 if (UseMBB == Flow || UseMBB == Endif) { 308 if (!UseMI->isPHI()) 309 return true; 310 311 auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB(); 312 // The register is live through the path If->Flow or Flow->Endif. 313 // we should not optimize for such cases. 314 if ((UseMBB == Flow && IncomingMBB != If) || 315 (UseMBB == Endif && IncomingMBB == Flow)) 316 return true; 317 } 318 } 319 return false; 320 }; 321 322 for (auto Reg : KillsInElse) { 323 if (!IsLiveThroughThen(Reg)) 324 CandidateRegs.push_back(Reg); 325 } 326 } 327 328 /// Collect the registers used in the waterfall loop block that are defined 329 /// before. 330 void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters( 331 MachineBasicBlock *LoopHeader, MachineBasicBlock *LoopEnd, 332 SmallSetVector<Register, 16> &CandidateRegs, 333 SmallSetVector<MachineBasicBlock *, 2> &Blocks, 334 SmallVectorImpl<MachineInstr *> &Instructions) const { 335 336 // Collect loop instructions, potentially spanning multiple blocks 337 auto *MBB = LoopHeader; 338 for (;;) { 339 Blocks.insert(MBB); 340 for (auto &MI : *MBB) { 341 if (MI.isDebugInstr()) 342 continue; 343 Instructions.push_back(&MI); 344 } 345 if (MBB == LoopEnd) 346 break; 347 assert(MBB->pred_size() == 1 || 348 (MBB == LoopHeader && MBB->pred_size() == 2)); 349 assert(MBB->succ_size() == 1); 350 MBB = *MBB->succ_begin(); 351 } 352 353 for (auto *I : Instructions) { 354 auto &MI = *I; 355 356 for (auto &MO : MI.operands()) { 357 if (!MO.isReg() || !MO.getReg() || MO.isDef()) 358 continue; 359 360 Register MOReg = MO.getReg(); 361 // We can only optimize AGPR/VGPR virtual register 362 if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg)) 363 continue; 364 365 if (MO.readsReg()) { 366 MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent(); 367 // Make sure the value is defined before the LOOP block 368 if (!Blocks.contains(DefMBB) && !CandidateRegs.contains(MOReg)) { 369 // If the variable is used after the loop, the register coalescer will 370 // merge the newly created register and remove the phi node again. 371 // Just do nothing in that case. 372 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg); 373 bool IsUsed = false; 374 for (auto *Succ : LoopEnd->successors()) { 375 if (!Blocks.contains(Succ) && 376 OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) { 377 IsUsed = true; 378 break; 379 } 380 } 381 if (!IsUsed) { 382 LLVM_DEBUG(dbgs() << "Found candidate reg: " 383 << printReg(MOReg, TRI, 0, MRI) << '\n'); 384 CandidateRegs.insert(MOReg); 385 } else { 386 LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: " 387 << printReg(MOReg, TRI, 0, MRI) << '\n'); 388 } 389 } 390 } 391 } 392 } 393 } 394 395 // Re-calculate the liveness of \p Reg in the THEN-region 396 void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion( 397 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const { 398 SetVector<MachineBasicBlock *> Blocks; 399 SmallVector<MachineBasicBlock *> WorkList({If}); 400 401 // Collect all successors until we see the flow block, where we should 402 // reconverge. 403 while (!WorkList.empty()) { 404 auto *MBB = WorkList.pop_back_val(); 405 for (auto *Succ : MBB->successors()) { 406 if (Succ != Flow && !Blocks.contains(Succ)) { 407 WorkList.push_back(Succ); 408 Blocks.insert(Succ); 409 } 410 } 411 } 412 413 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 414 for (MachineBasicBlock *MBB : Blocks) { 415 // Clear Live bit, as we will recalculate afterwards 416 LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB) 417 << '\n'); 418 OldVarInfo.AliveBlocks.reset(MBB->getNumber()); 419 } 420 421 SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming; 422 423 // Get the blocks the Reg should be alive through 424 for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E; 425 ++I) { 426 auto *UseMI = I->getParent(); 427 if (UseMI->isPHI() && I->readsReg()) { 428 if (Blocks.contains(UseMI->getParent())) 429 PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB()); 430 } 431 } 432 433 for (MachineBasicBlock *MBB : Blocks) { 434 SmallVector<MachineInstr *> Uses; 435 // PHI instructions has been processed before. 436 findNonPHIUsesInBlock(Reg, MBB, Uses); 437 438 if (Uses.size() == 1) { 439 LLVM_DEBUG(dbgs() << "Found one Non-PHI use in " 440 << printMBBReference(*MBB) << '\n'); 441 LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin())); 442 } else if (Uses.size() > 1) { 443 // Process the instructions in-order 444 LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in " 445 << printMBBReference(*MBB) << '\n'); 446 for (MachineInstr &MI : *MBB) { 447 if (llvm::is_contained(Uses, &MI)) 448 LV->HandleVirtRegUse(Reg, MBB, MI); 449 } 450 } 451 452 // Mark Reg alive through the block if this is a PHI incoming block 453 if (PHIIncoming.contains(MBB)) 454 LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(), 455 MBB); 456 } 457 458 // Set the isKilled flag if we get new Kills in the THEN region. 459 for (auto *MI : OldVarInfo.Kills) { 460 if (Blocks.contains(MI->getParent())) 461 MI->addRegisterKilled(Reg, TRI); 462 } 463 } 464 465 void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion( 466 Register Reg, Register NewReg, MachineBasicBlock *Flow, 467 MachineBasicBlock *Endif, 468 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { 469 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); 470 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 471 472 // Transfer aliveBlocks from Reg to NewReg 473 for (auto *MBB : ElseBlocks) { 474 unsigned BBNum = MBB->getNumber(); 475 if (OldVarInfo.AliveBlocks.test(BBNum)) { 476 NewVarInfo.AliveBlocks.set(BBNum); 477 LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB) 478 << '\n'); 479 OldVarInfo.AliveBlocks.reset(BBNum); 480 } 481 } 482 483 // Transfer the possible Kills in ElseBlocks from Reg to NewReg 484 auto I = OldVarInfo.Kills.begin(); 485 while (I != OldVarInfo.Kills.end()) { 486 if (ElseBlocks.contains((*I)->getParent())) { 487 NewVarInfo.Kills.push_back(*I); 488 I = OldVarInfo.Kills.erase(I); 489 } else { 490 ++I; 491 } 492 } 493 } 494 495 void SIOptimizeVGPRLiveRange::optimizeLiveRange( 496 Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow, 497 MachineBasicBlock *Endif, 498 SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const { 499 // Insert a new PHI, marking the value from the THEN region being 500 // undef. 501 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); 502 const auto *RC = MRI->getRegClass(Reg); 503 Register NewReg = MRI->createVirtualRegister(RC); 504 Register UndefReg = MRI->createVirtualRegister(RC); 505 MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(), 506 TII->get(TargetOpcode::PHI), NewReg); 507 for (auto *Pred : Flow->predecessors()) { 508 if (Pred == If) 509 PHI.addReg(Reg).addMBB(Pred); 510 else 511 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); 512 } 513 514 // Replace all uses in the ELSE region or the PHIs in ENDIF block 515 // Use early increment range because setReg() will update the linked list. 516 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { 517 auto *UseMI = O.getParent(); 518 auto *UseBlock = UseMI->getParent(); 519 // Replace uses in Endif block 520 if (UseBlock == Endif) { 521 assert(UseMI->isPHI() && "Uses should be PHI in Endif block"); 522 O.setReg(NewReg); 523 continue; 524 } 525 526 // Replace uses in Else region 527 if (ElseBlocks.contains(UseBlock)) 528 O.setReg(NewReg); 529 } 530 531 // The optimized Reg is not alive through Flow blocks anymore. 532 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 533 OldVarInfo.AliveBlocks.reset(Flow->getNumber()); 534 535 updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks); 536 updateLiveRangeInThenRegion(Reg, If, Flow); 537 } 538 539 void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange( 540 Register Reg, MachineBasicBlock *LoopHeader, 541 SmallSetVector<MachineBasicBlock *, 2> &Blocks, 542 SmallVectorImpl<MachineInstr *> &Instructions) const { 543 // Insert a new PHI, marking the value from the last loop iteration undef. 544 LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n'); 545 const auto *RC = MRI->getRegClass(Reg); 546 Register NewReg = MRI->createVirtualRegister(RC); 547 Register UndefReg = MRI->createVirtualRegister(RC); 548 549 // Replace all uses in the LOOP region 550 // Use early increment range because setReg() will update the linked list. 551 for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) { 552 auto *UseMI = O.getParent(); 553 auto *UseBlock = UseMI->getParent(); 554 // Replace uses in Loop blocks 555 if (Blocks.contains(UseBlock)) 556 O.setReg(NewReg); 557 } 558 559 MachineInstrBuilder PHI = 560 BuildMI(*LoopHeader, LoopHeader->getFirstNonPHI(), DebugLoc(), 561 TII->get(TargetOpcode::PHI), NewReg); 562 for (auto *Pred : LoopHeader->predecessors()) { 563 if (Blocks.contains(Pred)) 564 PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred); 565 else 566 PHI.addReg(Reg).addMBB(Pred); 567 } 568 569 LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg); 570 LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg); 571 572 // Find last use and mark as kill 573 MachineInstr *Kill = nullptr; 574 for (auto *MI : reverse(Instructions)) { 575 if (MI->readsRegister(NewReg, TRI)) { 576 MI->addRegisterKilled(NewReg, TRI); 577 NewVarInfo.Kills.push_back(MI); 578 Kill = MI; 579 break; 580 } 581 } 582 assert(Kill && "Failed to find last usage of register in loop"); 583 584 MachineBasicBlock *KillBlock = Kill->getParent(); 585 bool PostKillBlock = false; 586 for (auto *Block : Blocks) { 587 auto BBNum = Block->getNumber(); 588 589 // collectWaterfallCandidateRegisters only collects registers that are dead 590 // after the loop. So we know that the old reg is no longer live throughout 591 // the waterfall loop. 592 OldVarInfo.AliveBlocks.reset(BBNum); 593 594 // The new register is live up to (and including) the block that kills it. 595 PostKillBlock |= (Block == KillBlock); 596 if (PostKillBlock) { 597 NewVarInfo.AliveBlocks.reset(BBNum); 598 } else if (Block != LoopHeader) { 599 NewVarInfo.AliveBlocks.set(BBNum); 600 } 601 } 602 } 603 604 char SIOptimizeVGPRLiveRange::ID = 0; 605 606 INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE, 607 "SI Optimize VGPR LiveRange", false, false) 608 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 609 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 610 INITIALIZE_PASS_DEPENDENCY(LiveVariables) 611 INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE, 612 "SI Optimize VGPR LiveRange", false, false) 613 614 char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID; 615 616 FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() { 617 return new SIOptimizeVGPRLiveRange(); 618 } 619 620 bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) { 621 622 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 623 TII = ST.getInstrInfo(); 624 TRI = &TII->getRegisterInfo(); 625 MDT = &getAnalysis<MachineDominatorTree>(); 626 Loops = &getAnalysis<MachineLoopInfo>(); 627 LV = &getAnalysis<LiveVariables>(); 628 MRI = &MF.getRegInfo(); 629 630 if (skipFunction(MF.getFunction())) 631 return false; 632 633 bool MadeChange = false; 634 635 // TODO: we need to think about the order of visiting the blocks to get 636 // optimal result for nesting if-else cases. 637 for (MachineBasicBlock &MBB : MF) { 638 for (auto &MI : MBB.terminators()) { 639 // Detect the if-else blocks 640 if (MI.getOpcode() == AMDGPU::SI_IF) { 641 MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB(); 642 auto *Endif = getElseTarget(IfTarget); 643 if (!Endif) 644 continue; 645 646 SmallSetVector<MachineBasicBlock *, 16> ElseBlocks; 647 SmallVector<Register> CandidateRegs; 648 649 LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: " 650 << printMBBReference(MBB) << ' ' 651 << printMBBReference(*IfTarget) << ' ' 652 << printMBBReference(*Endif) << '\n'); 653 654 // Collect all the blocks in the ELSE region 655 collectElseRegionBlocks(IfTarget, Endif, ElseBlocks); 656 657 // Collect the registers can be optimized 658 collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks, 659 CandidateRegs); 660 MadeChange |= !CandidateRegs.empty(); 661 // Now we are safe to optimize. 662 for (auto Reg : CandidateRegs) 663 optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks); 664 } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) { 665 auto *LoopHeader = MI.getOperand(0).getMBB(); 666 auto *LoopEnd = &MBB; 667 668 LLVM_DEBUG(dbgs() << "Checking Waterfall loop: " 669 << printMBBReference(*LoopHeader) << '\n'); 670 671 SmallSetVector<Register, 16> CandidateRegs; 672 SmallVector<MachineInstr *, 16> Instructions; 673 SmallSetVector<MachineBasicBlock *, 2> Blocks; 674 675 collectWaterfallCandidateRegisters(LoopHeader, LoopEnd, CandidateRegs, 676 Blocks, Instructions); 677 MadeChange |= !CandidateRegs.empty(); 678 // Now we are safe to optimize. 679 for (auto Reg : CandidateRegs) 680 optimizeWaterfallLiveRange(Reg, LoopHeader, Blocks, Instructions); 681 } 682 } 683 } 684 685 return MadeChange; 686 } 687