1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass compute turns all control flow pseudo instructions into native one 11 /// computing their address on the fly; it also sets STACK_SIZE info. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUSubtarget.h" 17 #include "R600MachineFunctionInfo.h" 18 #include <set> 19 20 using namespace llvm; 21 22 #define DEBUG_TYPE "r600cf" 23 24 namespace { 25 26 struct CFStack { 27 enum StackItem { 28 ENTRY = 0, 29 SUB_ENTRY = 1, 30 FIRST_NON_WQM_PUSH = 2, 31 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 32 }; 33 34 const R600Subtarget *ST; 35 std::vector<StackItem> BranchStack; 36 std::vector<StackItem> LoopStack; 37 unsigned MaxStackSize; 38 unsigned CurrentEntries = 0; 39 unsigned CurrentSubEntries = 0; 40 41 CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), 42 // We need to reserve a stack entry for CALL_FS in vertex shaders. 43 MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} 44 45 unsigned getLoopDepth(); 46 bool branchStackContains(CFStack::StackItem); 47 bool requiresWorkAroundForInst(unsigned Opcode); 48 unsigned getSubEntrySize(CFStack::StackItem Item); 49 void updateMaxStackSize(); 50 void pushBranch(unsigned Opcode, bool isWQM = false); 51 void pushLoop(); 52 void popBranch(); 53 void popLoop(); 54 }; 55 56 unsigned CFStack::getLoopDepth() { 57 return LoopStack.size(); 58 } 59 60 bool CFStack::branchStackContains(CFStack::StackItem Item) { 61 return llvm::is_contained(BranchStack, Item); 62 } 63 64 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 65 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && 66 getLoopDepth() > 1) 67 return true; 68 69 if (!ST->hasCFAluBug()) 70 return false; 71 72 switch(Opcode) { 73 default: return false; 74 case R600::CF_ALU_PUSH_BEFORE: 75 case R600::CF_ALU_ELSE_AFTER: 76 case R600::CF_ALU_BREAK: 77 case R600::CF_ALU_CONTINUE: 78 if (CurrentSubEntries == 0) 79 return false; 80 if (ST->getWavefrontSize() == 64) { 81 // We are being conservative here. We only require this work-around if 82 // CurrentSubEntries > 3 && 83 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 84 // 85 // We have to be conservative, because we don't know for certain that 86 // our stack allocation algorithm for Evergreen/NI is correct. Applying this 87 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 88 // resources without any problems. 89 return CurrentSubEntries > 3; 90 } else { 91 assert(ST->getWavefrontSize() == 32); 92 // We are being conservative here. We only require the work-around if 93 // CurrentSubEntries > 7 && 94 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 95 // See the comment on the wavefront size == 64 case for why we are 96 // being conservative. 97 return CurrentSubEntries > 7; 98 } 99 } 100 } 101 102 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 103 switch(Item) { 104 default: 105 return 0; 106 case CFStack::FIRST_NON_WQM_PUSH: 107 assert(!ST->hasCaymanISA()); 108 if (ST->getGeneration() <= AMDGPUSubtarget::R700) { 109 // +1 For the push operation. 110 // +2 Extra space required. 111 return 3; 112 } else { 113 // Some documentation says that this is not necessary on Evergreen, 114 // but experimentation has show that we need to allocate 1 extra 115 // sub-entry for the first non-WQM push. 116 // +1 For the push operation. 117 // +1 Extra space required. 118 return 2; 119 } 120 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 121 assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 122 // +1 For the push operation. 123 // +1 Extra space required. 124 return 2; 125 case CFStack::SUB_ENTRY: 126 return 1; 127 } 128 } 129 130 void CFStack::updateMaxStackSize() { 131 unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4); 132 MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 133 } 134 135 void CFStack::pushBranch(unsigned Opcode, bool isWQM) { 136 CFStack::StackItem Item = CFStack::ENTRY; 137 switch(Opcode) { 138 case R600::CF_PUSH_EG: 139 case R600::CF_ALU_PUSH_BEFORE: 140 if (!isWQM) { 141 if (!ST->hasCaymanISA() && 142 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 143 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 144 // See comment in 145 // CFStack::getSubEntrySize() 146 else if (CurrentEntries > 0 && 147 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && 148 !ST->hasCaymanISA() && 149 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 150 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 151 else 152 Item = CFStack::SUB_ENTRY; 153 } else 154 Item = CFStack::ENTRY; 155 break; 156 } 157 BranchStack.push_back(Item); 158 if (Item == CFStack::ENTRY) 159 CurrentEntries++; 160 else 161 CurrentSubEntries += getSubEntrySize(Item); 162 updateMaxStackSize(); 163 } 164 165 void CFStack::pushLoop() { 166 LoopStack.push_back(CFStack::ENTRY); 167 CurrentEntries++; 168 updateMaxStackSize(); 169 } 170 171 void CFStack::popBranch() { 172 CFStack::StackItem Top = BranchStack.back(); 173 if (Top == CFStack::ENTRY) 174 CurrentEntries--; 175 else 176 CurrentSubEntries-= getSubEntrySize(Top); 177 BranchStack.pop_back(); 178 } 179 180 void CFStack::popLoop() { 181 CurrentEntries--; 182 LoopStack.pop_back(); 183 } 184 185 class R600ControlFlowFinalizer : public MachineFunctionPass { 186 private: 187 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>; 188 189 enum ControlFlowInstruction { 190 CF_TC, 191 CF_VC, 192 CF_CALL_FS, 193 CF_WHILE_LOOP, 194 CF_END_LOOP, 195 CF_LOOP_BREAK, 196 CF_LOOP_CONTINUE, 197 CF_JUMP, 198 CF_ELSE, 199 CF_POP, 200 CF_END 201 }; 202 203 const R600InstrInfo *TII = nullptr; 204 const R600RegisterInfo *TRI = nullptr; 205 unsigned MaxFetchInst; 206 const R600Subtarget *ST = nullptr; 207 208 bool IsTrivialInst(MachineInstr &MI) const { 209 switch (MI.getOpcode()) { 210 case R600::KILL: 211 case R600::RETURN: 212 return true; 213 default: 214 return false; 215 } 216 } 217 218 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 219 unsigned Opcode = 0; 220 bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 221 switch (CFI) { 222 case CF_TC: 223 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; 224 break; 225 case CF_VC: 226 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; 227 break; 228 case CF_CALL_FS: 229 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; 230 break; 231 case CF_WHILE_LOOP: 232 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; 233 break; 234 case CF_END_LOOP: 235 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; 236 break; 237 case CF_LOOP_BREAK: 238 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; 239 break; 240 case CF_LOOP_CONTINUE: 241 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; 242 break; 243 case CF_JUMP: 244 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; 245 break; 246 case CF_ELSE: 247 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; 248 break; 249 case CF_POP: 250 Opcode = isEg ? R600::POP_EG : R600::POP_R600; 251 break; 252 case CF_END: 253 if (ST->hasCaymanISA()) { 254 Opcode = R600::CF_END_CM; 255 break; 256 } 257 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; 258 break; 259 } 260 assert (Opcode && "No opcode selected"); 261 return TII->get(Opcode); 262 } 263 264 bool isCompatibleWithClause(const MachineInstr &MI, 265 std::set<unsigned> &DstRegs) const { 266 unsigned DstMI, SrcMI; 267 for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 268 E = MI.operands_end(); 269 I != E; ++I) { 270 const MachineOperand &MO = *I; 271 if (!MO.isReg()) 272 continue; 273 if (MO.isDef()) { 274 Register Reg = MO.getReg(); 275 if (R600::R600_Reg128RegClass.contains(Reg)) 276 DstMI = Reg; 277 else 278 DstMI = TRI->getMatchingSuperReg(Reg, 279 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 280 &R600::R600_Reg128RegClass); 281 } 282 if (MO.isUse()) { 283 Register Reg = MO.getReg(); 284 if (R600::R600_Reg128RegClass.contains(Reg)) 285 SrcMI = Reg; 286 else 287 SrcMI = TRI->getMatchingSuperReg(Reg, 288 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 289 &R600::R600_Reg128RegClass); 290 } 291 } 292 if ((DstRegs.find(SrcMI) == DstRegs.end())) { 293 DstRegs.insert(DstMI); 294 return true; 295 } else 296 return false; 297 } 298 299 ClauseFile 300 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 301 const { 302 MachineBasicBlock::iterator ClauseHead = I; 303 std::vector<MachineInstr *> ClauseContent; 304 unsigned AluInstCount = 0; 305 bool IsTex = TII->usesTextureCache(*ClauseHead); 306 std::set<unsigned> DstRegs; 307 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 308 if (IsTrivialInst(*I)) 309 continue; 310 if (AluInstCount >= MaxFetchInst) 311 break; 312 if ((IsTex && !TII->usesTextureCache(*I)) || 313 (!IsTex && !TII->usesVertexCache(*I))) 314 break; 315 if (!isCompatibleWithClause(*I, DstRegs)) 316 break; 317 AluInstCount ++; 318 ClauseContent.push_back(&*I); 319 } 320 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 321 getHWInstrDesc(IsTex?CF_TC:CF_VC)) 322 .addImm(0) // ADDR 323 .addImm(AluInstCount - 1); // COUNT 324 return ClauseFile(MIb, std::move(ClauseContent)); 325 } 326 327 void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const { 328 static const unsigned LiteralRegs[] = { 329 R600::ALU_LITERAL_X, 330 R600::ALU_LITERAL_Y, 331 R600::ALU_LITERAL_Z, 332 R600::ALU_LITERAL_W 333 }; 334 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = 335 TII->getSrcs(MI); 336 for (const auto &Src:Srcs) { 337 if (Src.first->getReg() != R600::ALU_LITERAL_X) 338 continue; 339 int64_t Imm = Src.second; 340 std::vector<MachineOperand *>::iterator It = 341 llvm::find_if(Lits, [&](MachineOperand *val) { 342 return val->isImm() && (val->getImm() == Imm); 343 }); 344 345 // Get corresponding Operand 346 MachineOperand &Operand = MI.getOperand( 347 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); 348 349 if (It != Lits.end()) { 350 // Reuse existing literal reg 351 unsigned Index = It - Lits.begin(); 352 Src.first->setReg(LiteralRegs[Index]); 353 } else { 354 // Allocate new literal reg 355 assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 356 Src.first->setReg(LiteralRegs[Lits.size()]); 357 Lits.push_back(&Operand); 358 } 359 } 360 } 361 362 MachineBasicBlock::iterator insertLiterals( 363 MachineBasicBlock::iterator InsertPos, 364 const std::vector<unsigned> &Literals) const { 365 MachineBasicBlock *MBB = InsertPos->getParent(); 366 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 367 unsigned LiteralPair0 = Literals[i]; 368 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 369 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 370 TII->get(R600::LITERALS)) 371 .addImm(LiteralPair0) 372 .addImm(LiteralPair1); 373 } 374 return InsertPos; 375 } 376 377 ClauseFile 378 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 379 const { 380 MachineInstr &ClauseHead = *I; 381 std::vector<MachineInstr *> ClauseContent; 382 I++; 383 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 384 if (IsTrivialInst(*I)) { 385 ++I; 386 continue; 387 } 388 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 389 break; 390 std::vector<MachineOperand *>Literals; 391 if (I->isBundle()) { 392 MachineInstr &DeleteMI = *I; 393 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 394 while (++BI != E && BI->isBundledWithPred()) { 395 BI->unbundleFromPred(); 396 for (MachineOperand &MO : BI->operands()) { 397 if (MO.isReg() && MO.isInternalRead()) 398 MO.setIsInternalRead(false); 399 } 400 getLiteral(*BI, Literals); 401 ClauseContent.push_back(&*BI); 402 } 403 I = BI; 404 DeleteMI.eraseFromParent(); 405 } else { 406 getLiteral(*I, Literals); 407 ClauseContent.push_back(&*I); 408 I++; 409 } 410 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { 411 MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), 412 TII->get(R600::LITERALS)); 413 if (Literals[i]->isImm()) { 414 MILit.addImm(Literals[i]->getImm()); 415 } else { 416 MILit.addGlobalAddress(Literals[i]->getGlobal(), 417 Literals[i]->getOffset()); 418 } 419 if (i + 1 < e) { 420 if (Literals[i + 1]->isImm()) { 421 MILit.addImm(Literals[i + 1]->getImm()); 422 } else { 423 MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), 424 Literals[i + 1]->getOffset()); 425 } 426 } else 427 MILit.addImm(0); 428 ClauseContent.push_back(MILit); 429 } 430 } 431 assert(ClauseContent.size() < 128 && "ALU clause is too big"); 432 ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1); 433 return ClauseFile(&ClauseHead, std::move(ClauseContent)); 434 } 435 436 void EmitFetchClause(MachineBasicBlock::iterator InsertPos, 437 const DebugLoc &DL, ClauseFile &Clause, 438 unsigned &CfCount) { 439 CounterPropagateAddr(*Clause.first, CfCount); 440 MachineBasicBlock *BB = Clause.first->getParent(); 441 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); 442 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 443 BB->splice(InsertPos, BB, Clause.second[i]); 444 } 445 CfCount += 2 * Clause.second.size(); 446 } 447 448 void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, 449 ClauseFile &Clause, unsigned &CfCount) { 450 Clause.first->getOperand(0).setImm(0); 451 CounterPropagateAddr(*Clause.first, CfCount); 452 MachineBasicBlock *BB = Clause.first->getParent(); 453 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); 454 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 455 BB->splice(InsertPos, BB, Clause.second[i]); 456 } 457 CfCount += Clause.second.size(); 458 } 459 460 void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { 461 MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm()); 462 } 463 void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, 464 unsigned Addr) const { 465 for (MachineInstr *MI : MIs) { 466 CounterPropagateAddr(*MI, Addr); 467 } 468 } 469 470 public: 471 static char ID; 472 473 R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} 474 475 bool runOnMachineFunction(MachineFunction &MF) override { 476 ST = &MF.getSubtarget<R600Subtarget>(); 477 MaxFetchInst = ST->getTexVTXClauseSize(); 478 TII = ST->getInstrInfo(); 479 TRI = ST->getRegisterInfo(); 480 481 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 482 483 CFStack CFStack(ST, MF.getFunction().getCallingConv()); 484 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 485 ++MB) { 486 MachineBasicBlock &MBB = *MB; 487 unsigned CfCount = 0; 488 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; 489 std::vector<MachineInstr * > IfThenElseStack; 490 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { 491 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 492 getHWInstrDesc(CF_CALL_FS)); 493 CfCount++; 494 } 495 std::vector<ClauseFile> FetchClauses, AluClauses; 496 std::vector<MachineInstr *> LastAlu(1); 497 std::vector<MachineInstr *> ToPopAfter; 498 499 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 500 I != E;) { 501 if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { 502 LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); 503 FetchClauses.push_back(MakeFetchClause(MBB, I)); 504 CfCount++; 505 LastAlu.back() = nullptr; 506 continue; 507 } 508 509 MachineBasicBlock::iterator MI = I; 510 if (MI->getOpcode() != R600::ENDIF) 511 LastAlu.back() = nullptr; 512 if (MI->getOpcode() == R600::CF_ALU) 513 LastAlu.back() = &*MI; 514 I++; 515 bool RequiresWorkAround = 516 CFStack.requiresWorkAroundForInst(MI->getOpcode()); 517 switch (MI->getOpcode()) { 518 case R600::CF_ALU_PUSH_BEFORE: 519 if (RequiresWorkAround) { 520 LLVM_DEBUG(dbgs() 521 << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 522 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) 523 .addImm(CfCount + 1) 524 .addImm(1); 525 MI->setDesc(TII->get(R600::CF_ALU)); 526 CfCount++; 527 CFStack.pushBranch(R600::CF_PUSH_EG); 528 } else 529 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); 530 LLVM_FALLTHROUGH; 531 case R600::CF_ALU: 532 I = MI; 533 AluClauses.push_back(MakeALUClause(MBB, I)); 534 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 535 CfCount++; 536 break; 537 case R600::WHILELOOP: { 538 CFStack.pushLoop(); 539 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 540 getHWInstrDesc(CF_WHILE_LOOP)) 541 .addImm(1); 542 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, 543 std::set<MachineInstr *>()); 544 Pair.second.insert(MIb); 545 LoopStack.push_back(std::move(Pair)); 546 MI->eraseFromParent(); 547 CfCount++; 548 break; 549 } 550 case R600::ENDLOOP: { 551 CFStack.popLoop(); 552 std::pair<unsigned, std::set<MachineInstr *>> Pair = 553 std::move(LoopStack.back()); 554 LoopStack.pop_back(); 555 CounterPropagateAddr(Pair.second, CfCount); 556 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 557 .addImm(Pair.first + 1); 558 MI->eraseFromParent(); 559 CfCount++; 560 break; 561 } 562 case R600::IF_PREDICATE_SET: { 563 LastAlu.push_back(nullptr); 564 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 565 getHWInstrDesc(CF_JUMP)) 566 .addImm(0) 567 .addImm(0); 568 IfThenElseStack.push_back(MIb); 569 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 570 MI->eraseFromParent(); 571 CfCount++; 572 break; 573 } 574 case R600::ELSE: { 575 MachineInstr * JumpInst = IfThenElseStack.back(); 576 IfThenElseStack.pop_back(); 577 CounterPropagateAddr(*JumpInst, CfCount); 578 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 579 getHWInstrDesc(CF_ELSE)) 580 .addImm(0) 581 .addImm(0); 582 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 583 IfThenElseStack.push_back(MIb); 584 MI->eraseFromParent(); 585 CfCount++; 586 break; 587 } 588 case R600::ENDIF: { 589 CFStack.popBranch(); 590 if (LastAlu.back()) { 591 ToPopAfter.push_back(LastAlu.back()); 592 } else { 593 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 594 getHWInstrDesc(CF_POP)) 595 .addImm(CfCount + 1) 596 .addImm(1); 597 (void)MIb; 598 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 599 CfCount++; 600 } 601 602 MachineInstr *IfOrElseInst = IfThenElseStack.back(); 603 IfThenElseStack.pop_back(); 604 CounterPropagateAddr(*IfOrElseInst, CfCount); 605 IfOrElseInst->getOperand(1).setImm(1); 606 LastAlu.pop_back(); 607 MI->eraseFromParent(); 608 break; 609 } 610 case R600::BREAK: { 611 CfCount ++; 612 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 613 getHWInstrDesc(CF_LOOP_BREAK)) 614 .addImm(0); 615 LoopStack.back().second.insert(MIb); 616 MI->eraseFromParent(); 617 break; 618 } 619 case R600::CONTINUE: { 620 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 621 getHWInstrDesc(CF_LOOP_CONTINUE)) 622 .addImm(0); 623 LoopStack.back().second.insert(MIb); 624 MI->eraseFromParent(); 625 CfCount++; 626 break; 627 } 628 case R600::RETURN: { 629 DebugLoc DL = MBB.findDebugLoc(MI); 630 BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); 631 CfCount++; 632 if (CfCount % 2) { 633 BuildMI(MBB, I, DL, TII->get(R600::PAD)); 634 CfCount++; 635 } 636 MI->eraseFromParent(); 637 for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) 638 EmitFetchClause(I, DL, FetchClauses[i], CfCount); 639 for (unsigned i = 0, e = AluClauses.size(); i < e; i++) 640 EmitALUClause(I, DL, AluClauses[i], CfCount); 641 break; 642 } 643 default: 644 if (TII->isExport(MI->getOpcode())) { 645 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 646 CfCount++; 647 } 648 break; 649 } 650 } 651 for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { 652 MachineInstr *Alu = ToPopAfter[i]; 653 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 654 TII->get(R600::CF_ALU_POP_AFTER)) 655 .addImm(Alu->getOperand(0).getImm()) 656 .addImm(Alu->getOperand(1).getImm()) 657 .addImm(Alu->getOperand(2).getImm()) 658 .addImm(Alu->getOperand(3).getImm()) 659 .addImm(Alu->getOperand(4).getImm()) 660 .addImm(Alu->getOperand(5).getImm()) 661 .addImm(Alu->getOperand(6).getImm()) 662 .addImm(Alu->getOperand(7).getImm()) 663 .addImm(Alu->getOperand(8).getImm()); 664 Alu->eraseFromParent(); 665 } 666 MFI->CFStackSize = CFStack.MaxStackSize; 667 } 668 669 return false; 670 } 671 672 StringRef getPassName() const override { 673 return "R600 Control Flow Finalizer Pass"; 674 } 675 }; 676 677 } // end anonymous namespace 678 679 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, 680 "R600 Control Flow Finalizer", false, false) 681 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, 682 "R600 Control Flow Finalizer", false, false) 683 684 char R600ControlFlowFinalizer::ID = 0; 685 686 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; 687 688 FunctionPass *llvm::createR600ControlFlowFinalizer() { 689 return new R600ControlFlowFinalizer(); 690 } 691