1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 /// \file 10 /// This pass compute turns all control flow pseudo instructions into native one 11 /// computing their address on the fly; it also sets STACK_SIZE info. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AMDGPU.h" 16 #include "AMDGPUSubtarget.h" 17 #include "R600MachineFunctionInfo.h" 18 #include <set> 19 20 using namespace llvm; 21 22 #define DEBUG_TYPE "r600cf" 23 24 namespace { 25 26 struct CFStack { 27 enum StackItem { 28 ENTRY = 0, 29 SUB_ENTRY = 1, 30 FIRST_NON_WQM_PUSH = 2, 31 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 32 }; 33 34 const R600Subtarget *ST; 35 std::vector<StackItem> BranchStack; 36 std::vector<StackItem> LoopStack; 37 unsigned MaxStackSize; 38 unsigned CurrentEntries = 0; 39 unsigned CurrentSubEntries = 0; 40 41 CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), 42 // We need to reserve a stack entry for CALL_FS in vertex shaders. 43 MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} 44 45 unsigned getLoopDepth(); 46 bool branchStackContains(CFStack::StackItem); 47 bool requiresWorkAroundForInst(unsigned Opcode); 48 unsigned getSubEntrySize(CFStack::StackItem Item); 49 void updateMaxStackSize(); 50 void pushBranch(unsigned Opcode, bool isWQM = false); 51 void pushLoop(); 52 void popBranch(); 53 void popLoop(); 54 }; 55 56 unsigned CFStack::getLoopDepth() { 57 return LoopStack.size(); 58 } 59 60 bool CFStack::branchStackContains(CFStack::StackItem Item) { 61 for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(), 62 E = BranchStack.end(); I != E; ++I) { 63 if (*I == Item) 64 return true; 65 } 66 return false; 67 } 68 69 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 70 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && 71 getLoopDepth() > 1) 72 return true; 73 74 if (!ST->hasCFAluBug()) 75 return false; 76 77 switch(Opcode) { 78 default: return false; 79 case R600::CF_ALU_PUSH_BEFORE: 80 case R600::CF_ALU_ELSE_AFTER: 81 case R600::CF_ALU_BREAK: 82 case R600::CF_ALU_CONTINUE: 83 if (CurrentSubEntries == 0) 84 return false; 85 if (ST->getWavefrontSize() == 64) { 86 // We are being conservative here. We only require this work-around if 87 // CurrentSubEntries > 3 && 88 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 89 // 90 // We have to be conservative, because we don't know for certain that 91 // our stack allocation algorithm for Evergreen/NI is correct. Applying this 92 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 93 // resources without any problems. 94 return CurrentSubEntries > 3; 95 } else { 96 assert(ST->getWavefrontSize() == 32); 97 // We are being conservative here. We only require the work-around if 98 // CurrentSubEntries > 7 && 99 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 100 // See the comment on the wavefront size == 64 case for why we are 101 // being conservative. 102 return CurrentSubEntries > 7; 103 } 104 } 105 } 106 107 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 108 switch(Item) { 109 default: 110 return 0; 111 case CFStack::FIRST_NON_WQM_PUSH: 112 assert(!ST->hasCaymanISA()); 113 if (ST->getGeneration() <= AMDGPUSubtarget::R700) { 114 // +1 For the push operation. 115 // +2 Extra space required. 116 return 3; 117 } else { 118 // Some documentation says that this is not necessary on Evergreen, 119 // but experimentation has show that we need to allocate 1 extra 120 // sub-entry for the first non-WQM push. 121 // +1 For the push operation. 122 // +1 Extra space required. 123 return 2; 124 } 125 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 126 assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 127 // +1 For the push operation. 128 // +1 Extra space required. 129 return 2; 130 case CFStack::SUB_ENTRY: 131 return 1; 132 } 133 } 134 135 void CFStack::updateMaxStackSize() { 136 unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4); 137 MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 138 } 139 140 void CFStack::pushBranch(unsigned Opcode, bool isWQM) { 141 CFStack::StackItem Item = CFStack::ENTRY; 142 switch(Opcode) { 143 case R600::CF_PUSH_EG: 144 case R600::CF_ALU_PUSH_BEFORE: 145 if (!isWQM) { 146 if (!ST->hasCaymanISA() && 147 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 148 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 149 // See comment in 150 // CFStack::getSubEntrySize() 151 else if (CurrentEntries > 0 && 152 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && 153 !ST->hasCaymanISA() && 154 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 155 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 156 else 157 Item = CFStack::SUB_ENTRY; 158 } else 159 Item = CFStack::ENTRY; 160 break; 161 } 162 BranchStack.push_back(Item); 163 if (Item == CFStack::ENTRY) 164 CurrentEntries++; 165 else 166 CurrentSubEntries += getSubEntrySize(Item); 167 updateMaxStackSize(); 168 } 169 170 void CFStack::pushLoop() { 171 LoopStack.push_back(CFStack::ENTRY); 172 CurrentEntries++; 173 updateMaxStackSize(); 174 } 175 176 void CFStack::popBranch() { 177 CFStack::StackItem Top = BranchStack.back(); 178 if (Top == CFStack::ENTRY) 179 CurrentEntries--; 180 else 181 CurrentSubEntries-= getSubEntrySize(Top); 182 BranchStack.pop_back(); 183 } 184 185 void CFStack::popLoop() { 186 CurrentEntries--; 187 LoopStack.pop_back(); 188 } 189 190 class R600ControlFlowFinalizer : public MachineFunctionPass { 191 private: 192 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>; 193 194 enum ControlFlowInstruction { 195 CF_TC, 196 CF_VC, 197 CF_CALL_FS, 198 CF_WHILE_LOOP, 199 CF_END_LOOP, 200 CF_LOOP_BREAK, 201 CF_LOOP_CONTINUE, 202 CF_JUMP, 203 CF_ELSE, 204 CF_POP, 205 CF_END 206 }; 207 208 const R600InstrInfo *TII = nullptr; 209 const R600RegisterInfo *TRI = nullptr; 210 unsigned MaxFetchInst; 211 const R600Subtarget *ST = nullptr; 212 213 bool IsTrivialInst(MachineInstr &MI) const { 214 switch (MI.getOpcode()) { 215 case R600::KILL: 216 case R600::RETURN: 217 return true; 218 default: 219 return false; 220 } 221 } 222 223 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 224 unsigned Opcode = 0; 225 bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 226 switch (CFI) { 227 case CF_TC: 228 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; 229 break; 230 case CF_VC: 231 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; 232 break; 233 case CF_CALL_FS: 234 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; 235 break; 236 case CF_WHILE_LOOP: 237 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; 238 break; 239 case CF_END_LOOP: 240 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; 241 break; 242 case CF_LOOP_BREAK: 243 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; 244 break; 245 case CF_LOOP_CONTINUE: 246 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; 247 break; 248 case CF_JUMP: 249 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; 250 break; 251 case CF_ELSE: 252 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; 253 break; 254 case CF_POP: 255 Opcode = isEg ? R600::POP_EG : R600::POP_R600; 256 break; 257 case CF_END: 258 if (ST->hasCaymanISA()) { 259 Opcode = R600::CF_END_CM; 260 break; 261 } 262 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; 263 break; 264 } 265 assert (Opcode && "No opcode selected"); 266 return TII->get(Opcode); 267 } 268 269 bool isCompatibleWithClause(const MachineInstr &MI, 270 std::set<unsigned> &DstRegs) const { 271 unsigned DstMI, SrcMI; 272 for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 273 E = MI.operands_end(); 274 I != E; ++I) { 275 const MachineOperand &MO = *I; 276 if (!MO.isReg()) 277 continue; 278 if (MO.isDef()) { 279 Register Reg = MO.getReg(); 280 if (R600::R600_Reg128RegClass.contains(Reg)) 281 DstMI = Reg; 282 else 283 DstMI = TRI->getMatchingSuperReg(Reg, 284 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 285 &R600::R600_Reg128RegClass); 286 } 287 if (MO.isUse()) { 288 Register Reg = MO.getReg(); 289 if (R600::R600_Reg128RegClass.contains(Reg)) 290 SrcMI = Reg; 291 else 292 SrcMI = TRI->getMatchingSuperReg(Reg, 293 R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 294 &R600::R600_Reg128RegClass); 295 } 296 } 297 if ((DstRegs.find(SrcMI) == DstRegs.end())) { 298 DstRegs.insert(DstMI); 299 return true; 300 } else 301 return false; 302 } 303 304 ClauseFile 305 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 306 const { 307 MachineBasicBlock::iterator ClauseHead = I; 308 std::vector<MachineInstr *> ClauseContent; 309 unsigned AluInstCount = 0; 310 bool IsTex = TII->usesTextureCache(*ClauseHead); 311 std::set<unsigned> DstRegs; 312 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 313 if (IsTrivialInst(*I)) 314 continue; 315 if (AluInstCount >= MaxFetchInst) 316 break; 317 if ((IsTex && !TII->usesTextureCache(*I)) || 318 (!IsTex && !TII->usesVertexCache(*I))) 319 break; 320 if (!isCompatibleWithClause(*I, DstRegs)) 321 break; 322 AluInstCount ++; 323 ClauseContent.push_back(&*I); 324 } 325 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 326 getHWInstrDesc(IsTex?CF_TC:CF_VC)) 327 .addImm(0) // ADDR 328 .addImm(AluInstCount - 1); // COUNT 329 return ClauseFile(MIb, std::move(ClauseContent)); 330 } 331 332 void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const { 333 static const unsigned LiteralRegs[] = { 334 R600::ALU_LITERAL_X, 335 R600::ALU_LITERAL_Y, 336 R600::ALU_LITERAL_Z, 337 R600::ALU_LITERAL_W 338 }; 339 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = 340 TII->getSrcs(MI); 341 for (const auto &Src:Srcs) { 342 if (Src.first->getReg() != R600::ALU_LITERAL_X) 343 continue; 344 int64_t Imm = Src.second; 345 std::vector<MachineOperand *>::iterator It = 346 llvm::find_if(Lits, [&](MachineOperand *val) { 347 return val->isImm() && (val->getImm() == Imm); 348 }); 349 350 // Get corresponding Operand 351 MachineOperand &Operand = MI.getOperand( 352 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); 353 354 if (It != Lits.end()) { 355 // Reuse existing literal reg 356 unsigned Index = It - Lits.begin(); 357 Src.first->setReg(LiteralRegs[Index]); 358 } else { 359 // Allocate new literal reg 360 assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 361 Src.first->setReg(LiteralRegs[Lits.size()]); 362 Lits.push_back(&Operand); 363 } 364 } 365 } 366 367 MachineBasicBlock::iterator insertLiterals( 368 MachineBasicBlock::iterator InsertPos, 369 const std::vector<unsigned> &Literals) const { 370 MachineBasicBlock *MBB = InsertPos->getParent(); 371 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 372 unsigned LiteralPair0 = Literals[i]; 373 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 374 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 375 TII->get(R600::LITERALS)) 376 .addImm(LiteralPair0) 377 .addImm(LiteralPair1); 378 } 379 return InsertPos; 380 } 381 382 ClauseFile 383 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 384 const { 385 MachineInstr &ClauseHead = *I; 386 std::vector<MachineInstr *> ClauseContent; 387 I++; 388 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 389 if (IsTrivialInst(*I)) { 390 ++I; 391 continue; 392 } 393 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 394 break; 395 std::vector<MachineOperand *>Literals; 396 if (I->isBundle()) { 397 MachineInstr &DeleteMI = *I; 398 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 399 while (++BI != E && BI->isBundledWithPred()) { 400 BI->unbundleFromPred(); 401 for (MachineOperand &MO : BI->operands()) { 402 if (MO.isReg() && MO.isInternalRead()) 403 MO.setIsInternalRead(false); 404 } 405 getLiteral(*BI, Literals); 406 ClauseContent.push_back(&*BI); 407 } 408 I = BI; 409 DeleteMI.eraseFromParent(); 410 } else { 411 getLiteral(*I, Literals); 412 ClauseContent.push_back(&*I); 413 I++; 414 } 415 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { 416 MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), 417 TII->get(R600::LITERALS)); 418 if (Literals[i]->isImm()) { 419 MILit.addImm(Literals[i]->getImm()); 420 } else { 421 MILit.addGlobalAddress(Literals[i]->getGlobal(), 422 Literals[i]->getOffset()); 423 } 424 if (i + 1 < e) { 425 if (Literals[i + 1]->isImm()) { 426 MILit.addImm(Literals[i + 1]->getImm()); 427 } else { 428 MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), 429 Literals[i + 1]->getOffset()); 430 } 431 } else 432 MILit.addImm(0); 433 ClauseContent.push_back(MILit); 434 } 435 } 436 assert(ClauseContent.size() < 128 && "ALU clause is too big"); 437 ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1); 438 return ClauseFile(&ClauseHead, std::move(ClauseContent)); 439 } 440 441 void EmitFetchClause(MachineBasicBlock::iterator InsertPos, 442 const DebugLoc &DL, ClauseFile &Clause, 443 unsigned &CfCount) { 444 CounterPropagateAddr(*Clause.first, CfCount); 445 MachineBasicBlock *BB = Clause.first->getParent(); 446 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); 447 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 448 BB->splice(InsertPos, BB, Clause.second[i]); 449 } 450 CfCount += 2 * Clause.second.size(); 451 } 452 453 void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, 454 ClauseFile &Clause, unsigned &CfCount) { 455 Clause.first->getOperand(0).setImm(0); 456 CounterPropagateAddr(*Clause.first, CfCount); 457 MachineBasicBlock *BB = Clause.first->getParent(); 458 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); 459 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 460 BB->splice(InsertPos, BB, Clause.second[i]); 461 } 462 CfCount += Clause.second.size(); 463 } 464 465 void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { 466 MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm()); 467 } 468 void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, 469 unsigned Addr) const { 470 for (MachineInstr *MI : MIs) { 471 CounterPropagateAddr(*MI, Addr); 472 } 473 } 474 475 public: 476 static char ID; 477 478 R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} 479 480 bool runOnMachineFunction(MachineFunction &MF) override { 481 ST = &MF.getSubtarget<R600Subtarget>(); 482 MaxFetchInst = ST->getTexVTXClauseSize(); 483 TII = ST->getInstrInfo(); 484 TRI = ST->getRegisterInfo(); 485 486 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 487 488 CFStack CFStack(ST, MF.getFunction().getCallingConv()); 489 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 490 ++MB) { 491 MachineBasicBlock &MBB = *MB; 492 unsigned CfCount = 0; 493 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; 494 std::vector<MachineInstr * > IfThenElseStack; 495 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { 496 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 497 getHWInstrDesc(CF_CALL_FS)); 498 CfCount++; 499 } 500 std::vector<ClauseFile> FetchClauses, AluClauses; 501 std::vector<MachineInstr *> LastAlu(1); 502 std::vector<MachineInstr *> ToPopAfter; 503 504 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 505 I != E;) { 506 if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { 507 LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); 508 FetchClauses.push_back(MakeFetchClause(MBB, I)); 509 CfCount++; 510 LastAlu.back() = nullptr; 511 continue; 512 } 513 514 MachineBasicBlock::iterator MI = I; 515 if (MI->getOpcode() != R600::ENDIF) 516 LastAlu.back() = nullptr; 517 if (MI->getOpcode() == R600::CF_ALU) 518 LastAlu.back() = &*MI; 519 I++; 520 bool RequiresWorkAround = 521 CFStack.requiresWorkAroundForInst(MI->getOpcode()); 522 switch (MI->getOpcode()) { 523 case R600::CF_ALU_PUSH_BEFORE: 524 if (RequiresWorkAround) { 525 LLVM_DEBUG(dbgs() 526 << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 527 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) 528 .addImm(CfCount + 1) 529 .addImm(1); 530 MI->setDesc(TII->get(R600::CF_ALU)); 531 CfCount++; 532 CFStack.pushBranch(R600::CF_PUSH_EG); 533 } else 534 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); 535 LLVM_FALLTHROUGH; 536 case R600::CF_ALU: 537 I = MI; 538 AluClauses.push_back(MakeALUClause(MBB, I)); 539 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 540 CfCount++; 541 break; 542 case R600::WHILELOOP: { 543 CFStack.pushLoop(); 544 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 545 getHWInstrDesc(CF_WHILE_LOOP)) 546 .addImm(1); 547 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, 548 std::set<MachineInstr *>()); 549 Pair.second.insert(MIb); 550 LoopStack.push_back(std::move(Pair)); 551 MI->eraseFromParent(); 552 CfCount++; 553 break; 554 } 555 case R600::ENDLOOP: { 556 CFStack.popLoop(); 557 std::pair<unsigned, std::set<MachineInstr *>> Pair = 558 std::move(LoopStack.back()); 559 LoopStack.pop_back(); 560 CounterPropagateAddr(Pair.second, CfCount); 561 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 562 .addImm(Pair.first + 1); 563 MI->eraseFromParent(); 564 CfCount++; 565 break; 566 } 567 case R600::IF_PREDICATE_SET: { 568 LastAlu.push_back(nullptr); 569 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 570 getHWInstrDesc(CF_JUMP)) 571 .addImm(0) 572 .addImm(0); 573 IfThenElseStack.push_back(MIb); 574 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 575 MI->eraseFromParent(); 576 CfCount++; 577 break; 578 } 579 case R600::ELSE: { 580 MachineInstr * JumpInst = IfThenElseStack.back(); 581 IfThenElseStack.pop_back(); 582 CounterPropagateAddr(*JumpInst, CfCount); 583 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 584 getHWInstrDesc(CF_ELSE)) 585 .addImm(0) 586 .addImm(0); 587 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 588 IfThenElseStack.push_back(MIb); 589 MI->eraseFromParent(); 590 CfCount++; 591 break; 592 } 593 case R600::ENDIF: { 594 CFStack.popBranch(); 595 if (LastAlu.back()) { 596 ToPopAfter.push_back(LastAlu.back()); 597 } else { 598 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 599 getHWInstrDesc(CF_POP)) 600 .addImm(CfCount + 1) 601 .addImm(1); 602 (void)MIb; 603 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 604 CfCount++; 605 } 606 607 MachineInstr *IfOrElseInst = IfThenElseStack.back(); 608 IfThenElseStack.pop_back(); 609 CounterPropagateAddr(*IfOrElseInst, CfCount); 610 IfOrElseInst->getOperand(1).setImm(1); 611 LastAlu.pop_back(); 612 MI->eraseFromParent(); 613 break; 614 } 615 case R600::BREAK: { 616 CfCount ++; 617 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 618 getHWInstrDesc(CF_LOOP_BREAK)) 619 .addImm(0); 620 LoopStack.back().second.insert(MIb); 621 MI->eraseFromParent(); 622 break; 623 } 624 case R600::CONTINUE: { 625 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 626 getHWInstrDesc(CF_LOOP_CONTINUE)) 627 .addImm(0); 628 LoopStack.back().second.insert(MIb); 629 MI->eraseFromParent(); 630 CfCount++; 631 break; 632 } 633 case R600::RETURN: { 634 DebugLoc DL = MBB.findDebugLoc(MI); 635 BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); 636 CfCount++; 637 if (CfCount % 2) { 638 BuildMI(MBB, I, DL, TII->get(R600::PAD)); 639 CfCount++; 640 } 641 MI->eraseFromParent(); 642 for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) 643 EmitFetchClause(I, DL, FetchClauses[i], CfCount); 644 for (unsigned i = 0, e = AluClauses.size(); i < e; i++) 645 EmitALUClause(I, DL, AluClauses[i], CfCount); 646 break; 647 } 648 default: 649 if (TII->isExport(MI->getOpcode())) { 650 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 651 CfCount++; 652 } 653 break; 654 } 655 } 656 for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { 657 MachineInstr *Alu = ToPopAfter[i]; 658 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 659 TII->get(R600::CF_ALU_POP_AFTER)) 660 .addImm(Alu->getOperand(0).getImm()) 661 .addImm(Alu->getOperand(1).getImm()) 662 .addImm(Alu->getOperand(2).getImm()) 663 .addImm(Alu->getOperand(3).getImm()) 664 .addImm(Alu->getOperand(4).getImm()) 665 .addImm(Alu->getOperand(5).getImm()) 666 .addImm(Alu->getOperand(6).getImm()) 667 .addImm(Alu->getOperand(7).getImm()) 668 .addImm(Alu->getOperand(8).getImm()); 669 Alu->eraseFromParent(); 670 } 671 MFI->CFStackSize = CFStack.MaxStackSize; 672 } 673 674 return false; 675 } 676 677 StringRef getPassName() const override { 678 return "R600 Control Flow Finalizer Pass"; 679 } 680 }; 681 682 } // end anonymous namespace 683 684 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, 685 "R600 Control Flow Finalizer", false, false) 686 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, 687 "R600 Control Flow Finalizer", false, false) 688 689 char R600ControlFlowFinalizer::ID = 0; 690 691 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; 692 693 FunctionPass *llvm::createR600ControlFlowFinalizer() { 694 return new R600ControlFlowFinalizer(); 695 } 696