1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This pass compute turns all control flow pseudo instructions into native one
11 /// computing their address on the fly; it also sets STACK_SIZE info.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "AMDGPU.h"
16 #include "AMDGPUSubtarget.h"
17 #include "R600MachineFunctionInfo.h"
18 #include <set>
19 
20 using namespace llvm;
21 
22 #define DEBUG_TYPE "r600cf"
23 
24 namespace {
25 
26 struct CFStack {
27   enum StackItem {
28     ENTRY = 0,
29     SUB_ENTRY = 1,
30     FIRST_NON_WQM_PUSH = 2,
31     FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
32   };
33 
34   const R600Subtarget *ST;
35   std::vector<StackItem> BranchStack;
36   std::vector<StackItem> LoopStack;
37   unsigned MaxStackSize;
38   unsigned CurrentEntries = 0;
39   unsigned CurrentSubEntries = 0;
40 
41   CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
42       // We need to reserve a stack entry for CALL_FS in vertex shaders.
43       MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
44 
45   unsigned getLoopDepth();
46   bool branchStackContains(CFStack::StackItem);
47   bool requiresWorkAroundForInst(unsigned Opcode);
48   unsigned getSubEntrySize(CFStack::StackItem Item);
49   void updateMaxStackSize();
50   void pushBranch(unsigned Opcode, bool isWQM = false);
51   void pushLoop();
52   void popBranch();
53   void popLoop();
54 };
55 
56 unsigned CFStack::getLoopDepth() {
57   return LoopStack.size();
58 }
59 
60 bool CFStack::branchStackContains(CFStack::StackItem Item) {
61   return llvm::is_contained(BranchStack, Item);
62 }
63 
64 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
65   if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
66       getLoopDepth() > 1)
67     return true;
68 
69   if (!ST->hasCFAluBug())
70     return false;
71 
72   switch(Opcode) {
73   default: return false;
74   case R600::CF_ALU_PUSH_BEFORE:
75   case R600::CF_ALU_ELSE_AFTER:
76   case R600::CF_ALU_BREAK:
77   case R600::CF_ALU_CONTINUE:
78     if (CurrentSubEntries == 0)
79       return false;
80     if (ST->getWavefrontSize() == 64) {
81       // We are being conservative here.  We only require this work-around if
82       // CurrentSubEntries > 3 &&
83       // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
84       //
85       // We have to be conservative, because we don't know for certain that
86       // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
87       // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
88       // resources without any problems.
89       return CurrentSubEntries > 3;
90     } else {
91       assert(ST->getWavefrontSize() == 32);
92       // We are being conservative here.  We only require the work-around if
93       // CurrentSubEntries > 7 &&
94       // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
95       // See the comment on the wavefront size == 64 case for why we are
96       // being conservative.
97       return CurrentSubEntries > 7;
98     }
99   }
100 }
101 
102 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
103   switch(Item) {
104   default:
105     return 0;
106   case CFStack::FIRST_NON_WQM_PUSH:
107   assert(!ST->hasCaymanISA());
108   if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
109     // +1 For the push operation.
110     // +2 Extra space required.
111     return 3;
112   } else {
113     // Some documentation says that this is not necessary on Evergreen,
114     // but experimentation has show that we need to allocate 1 extra
115     // sub-entry for the first non-WQM push.
116     // +1 For the push operation.
117     // +1 Extra space required.
118     return 2;
119   }
120   case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
121     assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
122     // +1 For the push operation.
123     // +1 Extra space required.
124     return 2;
125   case CFStack::SUB_ENTRY:
126     return 1;
127   }
128 }
129 
130 void CFStack::updateMaxStackSize() {
131   unsigned CurrentStackSize = CurrentEntries + divideCeil(CurrentSubEntries, 4);
132   MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
133 }
134 
135 void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
136   CFStack::StackItem Item = CFStack::ENTRY;
137   switch(Opcode) {
138   case R600::CF_PUSH_EG:
139   case R600::CF_ALU_PUSH_BEFORE:
140     if (!isWQM) {
141       if (!ST->hasCaymanISA() &&
142           !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
143         Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
144                                              // See comment in
145                                              // CFStack::getSubEntrySize()
146       else if (CurrentEntries > 0 &&
147                ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
148                !ST->hasCaymanISA() &&
149                !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
150         Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
151       else
152         Item = CFStack::SUB_ENTRY;
153     } else
154       Item = CFStack::ENTRY;
155     break;
156   }
157   BranchStack.push_back(Item);
158   if (Item == CFStack::ENTRY)
159     CurrentEntries++;
160   else
161     CurrentSubEntries += getSubEntrySize(Item);
162   updateMaxStackSize();
163 }
164 
165 void CFStack::pushLoop() {
166   LoopStack.push_back(CFStack::ENTRY);
167   CurrentEntries++;
168   updateMaxStackSize();
169 }
170 
171 void CFStack::popBranch() {
172   CFStack::StackItem Top = BranchStack.back();
173   if (Top == CFStack::ENTRY)
174     CurrentEntries--;
175   else
176     CurrentSubEntries-= getSubEntrySize(Top);
177   BranchStack.pop_back();
178 }
179 
180 void CFStack::popLoop() {
181   CurrentEntries--;
182   LoopStack.pop_back();
183 }
184 
185 class R600ControlFlowFinalizer : public MachineFunctionPass {
186 private:
187   using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
188 
189   enum ControlFlowInstruction {
190     CF_TC,
191     CF_VC,
192     CF_CALL_FS,
193     CF_WHILE_LOOP,
194     CF_END_LOOP,
195     CF_LOOP_BREAK,
196     CF_LOOP_CONTINUE,
197     CF_JUMP,
198     CF_ELSE,
199     CF_POP,
200     CF_END
201   };
202 
203   const R600InstrInfo *TII = nullptr;
204   const R600RegisterInfo *TRI = nullptr;
205   unsigned MaxFetchInst;
206   const R600Subtarget *ST = nullptr;
207 
208   bool IsTrivialInst(MachineInstr &MI) const {
209     switch (MI.getOpcode()) {
210     case R600::KILL:
211     case R600::RETURN:
212       return true;
213     default:
214       return false;
215     }
216   }
217 
218   const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
219     unsigned Opcode = 0;
220     bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
221     switch (CFI) {
222     case CF_TC:
223       Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
224       break;
225     case CF_VC:
226       Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
227       break;
228     case CF_CALL_FS:
229       Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
230       break;
231     case CF_WHILE_LOOP:
232       Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
233       break;
234     case CF_END_LOOP:
235       Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
236       break;
237     case CF_LOOP_BREAK:
238       Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
239       break;
240     case CF_LOOP_CONTINUE:
241       Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
242       break;
243     case CF_JUMP:
244       Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
245       break;
246     case CF_ELSE:
247       Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
248       break;
249     case CF_POP:
250       Opcode = isEg ? R600::POP_EG : R600::POP_R600;
251       break;
252     case CF_END:
253       if (ST->hasCaymanISA()) {
254         Opcode = R600::CF_END_CM;
255         break;
256       }
257       Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
258       break;
259     }
260     assert (Opcode && "No opcode selected");
261     return TII->get(Opcode);
262   }
263 
264   bool isCompatibleWithClause(const MachineInstr &MI,
265                               std::set<unsigned> &DstRegs) const {
266     unsigned DstMI, SrcMI;
267     for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
268                                           E = MI.operands_end();
269          I != E; ++I) {
270       const MachineOperand &MO = *I;
271       if (!MO.isReg())
272         continue;
273       if (MO.isDef()) {
274         Register Reg = MO.getReg();
275         if (R600::R600_Reg128RegClass.contains(Reg))
276           DstMI = Reg;
277         else
278           DstMI = TRI->getMatchingSuperReg(Reg,
279               R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
280               &R600::R600_Reg128RegClass);
281       }
282       if (MO.isUse()) {
283         Register Reg = MO.getReg();
284         if (R600::R600_Reg128RegClass.contains(Reg))
285           SrcMI = Reg;
286         else
287           SrcMI = TRI->getMatchingSuperReg(Reg,
288               R600RegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
289               &R600::R600_Reg128RegClass);
290       }
291     }
292     if ((DstRegs.find(SrcMI) == DstRegs.end())) {
293       DstRegs.insert(DstMI);
294       return true;
295     } else
296       return false;
297   }
298 
299   ClauseFile
300   MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
301       const {
302     MachineBasicBlock::iterator ClauseHead = I;
303     std::vector<MachineInstr *> ClauseContent;
304     unsigned AluInstCount = 0;
305     bool IsTex = TII->usesTextureCache(*ClauseHead);
306     std::set<unsigned> DstRegs;
307     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
308       if (IsTrivialInst(*I))
309         continue;
310       if (AluInstCount >= MaxFetchInst)
311         break;
312       if ((IsTex && !TII->usesTextureCache(*I)) ||
313           (!IsTex && !TII->usesVertexCache(*I)))
314         break;
315       if (!isCompatibleWithClause(*I, DstRegs))
316         break;
317       AluInstCount ++;
318       ClauseContent.push_back(&*I);
319     }
320     MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
321         getHWInstrDesc(IsTex?CF_TC:CF_VC))
322         .addImm(0) // ADDR
323         .addImm(AluInstCount - 1); // COUNT
324     return ClauseFile(MIb, std::move(ClauseContent));
325   }
326 
327   void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
328     static const unsigned LiteralRegs[] = {
329       R600::ALU_LITERAL_X,
330       R600::ALU_LITERAL_Y,
331       R600::ALU_LITERAL_Z,
332       R600::ALU_LITERAL_W
333     };
334     const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
335         TII->getSrcs(MI);
336     for (const auto &Src:Srcs) {
337       if (Src.first->getReg() != R600::ALU_LITERAL_X)
338         continue;
339       int64_t Imm = Src.second;
340       std::vector<MachineOperand *>::iterator It =
341           llvm::find_if(Lits, [&](MachineOperand *val) {
342             return val->isImm() && (val->getImm() == Imm);
343           });
344 
345       // Get corresponding Operand
346       MachineOperand &Operand = MI.getOperand(
347           TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
348 
349       if (It != Lits.end()) {
350         // Reuse existing literal reg
351         unsigned Index = It - Lits.begin();
352         Src.first->setReg(LiteralRegs[Index]);
353       } else {
354         // Allocate new literal reg
355         assert(Lits.size() < 4 && "Too many literals in Instruction Group");
356         Src.first->setReg(LiteralRegs[Lits.size()]);
357         Lits.push_back(&Operand);
358       }
359     }
360   }
361 
362   MachineBasicBlock::iterator insertLiterals(
363       MachineBasicBlock::iterator InsertPos,
364       const std::vector<unsigned> &Literals) const {
365     MachineBasicBlock *MBB = InsertPos->getParent();
366     for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
367       unsigned LiteralPair0 = Literals[i];
368       unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
369       InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
370           TII->get(R600::LITERALS))
371           .addImm(LiteralPair0)
372           .addImm(LiteralPair1);
373     }
374     return InsertPos;
375   }
376 
377   ClauseFile
378   MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
379       const {
380     MachineInstr &ClauseHead = *I;
381     std::vector<MachineInstr *> ClauseContent;
382     I++;
383     for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
384       if (IsTrivialInst(*I)) {
385         ++I;
386         continue;
387       }
388       if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
389         break;
390       std::vector<MachineOperand *>Literals;
391       if (I->isBundle()) {
392         MachineInstr &DeleteMI = *I;
393         MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
394         while (++BI != E && BI->isBundledWithPred()) {
395           BI->unbundleFromPred();
396           for (MachineOperand &MO : BI->operands()) {
397             if (MO.isReg() && MO.isInternalRead())
398               MO.setIsInternalRead(false);
399           }
400           getLiteral(*BI, Literals);
401           ClauseContent.push_back(&*BI);
402         }
403         I = BI;
404         DeleteMI.eraseFromParent();
405       } else {
406         getLiteral(*I, Literals);
407         ClauseContent.push_back(&*I);
408         I++;
409       }
410       for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
411         MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
412             TII->get(R600::LITERALS));
413         if (Literals[i]->isImm()) {
414             MILit.addImm(Literals[i]->getImm());
415         } else {
416             MILit.addGlobalAddress(Literals[i]->getGlobal(),
417                                    Literals[i]->getOffset());
418         }
419         if (i + 1 < e) {
420           if (Literals[i + 1]->isImm()) {
421             MILit.addImm(Literals[i + 1]->getImm());
422           } else {
423             MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
424                                    Literals[i + 1]->getOffset());
425           }
426         } else
427           MILit.addImm(0);
428         ClauseContent.push_back(MILit);
429       }
430     }
431     assert(ClauseContent.size() < 128 && "ALU clause is too big");
432     ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
433     return ClauseFile(&ClauseHead, std::move(ClauseContent));
434   }
435 
436   void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
437                        const DebugLoc &DL, ClauseFile &Clause,
438                        unsigned &CfCount) {
439     CounterPropagateAddr(*Clause.first, CfCount);
440     MachineBasicBlock *BB = Clause.first->getParent();
441     BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
442     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
443       BB->splice(InsertPos, BB, Clause.second[i]);
444     }
445     CfCount += 2 * Clause.second.size();
446   }
447 
448   void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
449                      ClauseFile &Clause, unsigned &CfCount) {
450     Clause.first->getOperand(0).setImm(0);
451     CounterPropagateAddr(*Clause.first, CfCount);
452     MachineBasicBlock *BB = Clause.first->getParent();
453     BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
454     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
455       BB->splice(InsertPos, BB, Clause.second[i]);
456     }
457     CfCount += Clause.second.size();
458   }
459 
460   void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
461     MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
462   }
463   void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
464                             unsigned Addr) const {
465     for (MachineInstr *MI : MIs) {
466       CounterPropagateAddr(*MI, Addr);
467     }
468   }
469 
470 public:
471   static char ID;
472 
473   R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
474 
475   bool runOnMachineFunction(MachineFunction &MF) override {
476     ST = &MF.getSubtarget<R600Subtarget>();
477     MaxFetchInst = ST->getTexVTXClauseSize();
478     TII = ST->getInstrInfo();
479     TRI = ST->getRegisterInfo();
480 
481     R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
482 
483     CFStack CFStack(ST, MF.getFunction().getCallingConv());
484     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
485         ++MB) {
486       MachineBasicBlock &MBB = *MB;
487       unsigned CfCount = 0;
488       std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
489       std::vector<MachineInstr * > IfThenElseStack;
490       if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
491         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
492             getHWInstrDesc(CF_CALL_FS));
493         CfCount++;
494       }
495       std::vector<ClauseFile> FetchClauses, AluClauses;
496       std::vector<MachineInstr *> LastAlu(1);
497       std::vector<MachineInstr *> ToPopAfter;
498 
499       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
500           I != E;) {
501         if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
502           LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
503           FetchClauses.push_back(MakeFetchClause(MBB, I));
504           CfCount++;
505           LastAlu.back() = nullptr;
506           continue;
507         }
508 
509         MachineBasicBlock::iterator MI = I;
510         if (MI->getOpcode() != R600::ENDIF)
511           LastAlu.back() = nullptr;
512         if (MI->getOpcode() == R600::CF_ALU)
513           LastAlu.back() = &*MI;
514         I++;
515         bool RequiresWorkAround =
516             CFStack.requiresWorkAroundForInst(MI->getOpcode());
517         switch (MI->getOpcode()) {
518         case R600::CF_ALU_PUSH_BEFORE:
519           if (RequiresWorkAround) {
520             LLVM_DEBUG(dbgs()
521                        << "Applying bug work-around for ALU_PUSH_BEFORE\n");
522             BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
523                 .addImm(CfCount + 1)
524                 .addImm(1);
525             MI->setDesc(TII->get(R600::CF_ALU));
526             CfCount++;
527             CFStack.pushBranch(R600::CF_PUSH_EG);
528           } else
529             CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
530           LLVM_FALLTHROUGH;
531         case R600::CF_ALU:
532           I = MI;
533           AluClauses.push_back(MakeALUClause(MBB, I));
534           LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
535           CfCount++;
536           break;
537         case R600::WHILELOOP: {
538           CFStack.pushLoop();
539           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
540               getHWInstrDesc(CF_WHILE_LOOP))
541               .addImm(1);
542           std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
543               std::set<MachineInstr *>());
544           Pair.second.insert(MIb);
545           LoopStack.push_back(std::move(Pair));
546           MI->eraseFromParent();
547           CfCount++;
548           break;
549         }
550         case R600::ENDLOOP: {
551           CFStack.popLoop();
552           std::pair<unsigned, std::set<MachineInstr *>> Pair =
553               std::move(LoopStack.back());
554           LoopStack.pop_back();
555           CounterPropagateAddr(Pair.second, CfCount);
556           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
557               .addImm(Pair.first + 1);
558           MI->eraseFromParent();
559           CfCount++;
560           break;
561         }
562         case R600::IF_PREDICATE_SET: {
563           LastAlu.push_back(nullptr);
564           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
565               getHWInstrDesc(CF_JUMP))
566               .addImm(0)
567               .addImm(0);
568           IfThenElseStack.push_back(MIb);
569           LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
570           MI->eraseFromParent();
571           CfCount++;
572           break;
573         }
574         case R600::ELSE: {
575           MachineInstr * JumpInst = IfThenElseStack.back();
576           IfThenElseStack.pop_back();
577           CounterPropagateAddr(*JumpInst, CfCount);
578           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
579               getHWInstrDesc(CF_ELSE))
580               .addImm(0)
581               .addImm(0);
582           LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
583           IfThenElseStack.push_back(MIb);
584           MI->eraseFromParent();
585           CfCount++;
586           break;
587         }
588         case R600::ENDIF: {
589           CFStack.popBranch();
590           if (LastAlu.back()) {
591             ToPopAfter.push_back(LastAlu.back());
592           } else {
593             MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
594                 getHWInstrDesc(CF_POP))
595                 .addImm(CfCount + 1)
596                 .addImm(1);
597             (void)MIb;
598             LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
599             CfCount++;
600           }
601 
602           MachineInstr *IfOrElseInst = IfThenElseStack.back();
603           IfThenElseStack.pop_back();
604           CounterPropagateAddr(*IfOrElseInst, CfCount);
605           IfOrElseInst->getOperand(1).setImm(1);
606           LastAlu.pop_back();
607           MI->eraseFromParent();
608           break;
609         }
610         case R600::BREAK: {
611           CfCount ++;
612           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
613               getHWInstrDesc(CF_LOOP_BREAK))
614               .addImm(0);
615           LoopStack.back().second.insert(MIb);
616           MI->eraseFromParent();
617           break;
618         }
619         case R600::CONTINUE: {
620           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
621               getHWInstrDesc(CF_LOOP_CONTINUE))
622               .addImm(0);
623           LoopStack.back().second.insert(MIb);
624           MI->eraseFromParent();
625           CfCount++;
626           break;
627         }
628         case R600::RETURN: {
629           DebugLoc DL = MBB.findDebugLoc(MI);
630           BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
631           CfCount++;
632           if (CfCount % 2) {
633             BuildMI(MBB, I, DL, TII->get(R600::PAD));
634             CfCount++;
635           }
636           MI->eraseFromParent();
637           for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
638             EmitFetchClause(I, DL, FetchClauses[i], CfCount);
639           for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
640             EmitALUClause(I, DL, AluClauses[i], CfCount);
641           break;
642         }
643         default:
644           if (TII->isExport(MI->getOpcode())) {
645             LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
646             CfCount++;
647           }
648           break;
649         }
650       }
651       for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
652         MachineInstr *Alu = ToPopAfter[i];
653         BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
654             TII->get(R600::CF_ALU_POP_AFTER))
655             .addImm(Alu->getOperand(0).getImm())
656             .addImm(Alu->getOperand(1).getImm())
657             .addImm(Alu->getOperand(2).getImm())
658             .addImm(Alu->getOperand(3).getImm())
659             .addImm(Alu->getOperand(4).getImm())
660             .addImm(Alu->getOperand(5).getImm())
661             .addImm(Alu->getOperand(6).getImm())
662             .addImm(Alu->getOperand(7).getImm())
663             .addImm(Alu->getOperand(8).getImm());
664         Alu->eraseFromParent();
665       }
666       MFI->CFStackSize = CFStack.MaxStackSize;
667     }
668 
669     return false;
670   }
671 
672   StringRef getPassName() const override {
673     return "R600 Control Flow Finalizer Pass";
674   }
675 };
676 
677 } // end anonymous namespace
678 
679 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
680                      "R600 Control Flow Finalizer", false, false)
681 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
682                     "R600 Control Flow Finalizer", false, false)
683 
684 char R600ControlFlowFinalizer::ID = 0;
685 
686 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
687 
688 FunctionPass *llvm::createR600ControlFlowFinalizer() {
689   return new R600ControlFlowFinalizer();
690 }
691