1 //===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the machine instruction level CFG structurizer pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "SIInstrInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Analysis/CFG.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegionInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/IR/DebugLoc.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Target/TargetInstrInfo.h"
33 #include "llvm/Target/TargetLowering.h"
34 #include "llvm/Target/TargetSubtargetInfo.h"
35 #include <tuple>
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "amdgpucfgstructurizer"
39 
40 namespace {
41 class PHILinearizeDestIterator;
42 
43 class PHILinearize {
44   friend class PHILinearizeDestIterator;
45 
46 public:
47   typedef std::pair<unsigned, MachineBasicBlock *> PHISourceT;
48 
49 private:
50   typedef DenseSet<PHISourceT> PHISourcesT;
51   typedef struct {
52     unsigned DestReg;
53     DebugLoc DL;
54     PHISourcesT Sources;
55   } PHIInfoElementT;
56   typedef SmallPtrSet<PHIInfoElementT *, 2> PHIInfoT;
57   PHIInfoT PHIInfo;
58 
59   static unsigned phiInfoElementGetDest(PHIInfoElementT *Info);
60   static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef);
61   static DebugLoc phiInfoElementGetDebugLoc(PHIInfoElementT *Info);
62   static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info);
63   static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg,
64                                       MachineBasicBlock *SourceMBB);
65   static void phiInfoElementRemoveSource(PHIInfoElementT *Info,
66                                          unsigned SourceReg,
67                                          MachineBasicBlock *SourceMBB);
68   PHIInfoElementT *findPHIInfoElement(unsigned DestReg);
69   PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg,
70                                                 MachineBasicBlock *SourceMBB);
71 
72 public:
73   bool findSourcesFromMBB(MachineBasicBlock *SourceMBB,
74                           SmallVector<unsigned, 4> &Sources);
75   void addDest(unsigned DestReg, const DebugLoc &DL);
76   void replaceDef(unsigned OldDestReg, unsigned NewDestReg);
77   void deleteDef(unsigned DestReg);
78   void addSource(unsigned DestReg, unsigned SourceReg,
79                  MachineBasicBlock *SourceMBB);
80   void removeSource(unsigned DestReg, unsigned SourceReg,
81                     MachineBasicBlock *SourceMBB = nullptr);
82   bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
83                 unsigned &DestReg);
84   bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr);
85   unsigned getNumSources(unsigned DestReg);
86   void dump(MachineRegisterInfo *MRI);
87   void clear();
88 
89   typedef PHISourcesT::iterator source_iterator;
90   typedef PHILinearizeDestIterator dest_iterator;
91 
92   dest_iterator dests_begin();
93   dest_iterator dests_end();
94 
95   source_iterator sources_begin(unsigned Reg);
96   source_iterator sources_end(unsigned Reg);
97 };
98 
99 class PHILinearizeDestIterator {
100 private:
101   PHILinearize::PHIInfoT::iterator Iter;
102 
103 public:
104   unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); }
105   PHILinearizeDestIterator &operator++() {
106     ++Iter;
107     return *this;
108   }
109   bool operator==(const PHILinearizeDestIterator &I) const {
110     return I.Iter == Iter;
111   }
112   bool operator!=(const PHILinearizeDestIterator &I) const {
113     return I.Iter != Iter;
114   }
115 
116   PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {}
117 };
118 
119 unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) {
120   return Info->DestReg;
121 }
122 
123 void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info,
124                                         unsigned NewDef) {
125   Info->DestReg = NewDef;
126 }
127 
128 DebugLoc PHILinearize::phiInfoElementGetDebugLoc(PHIInfoElementT *Info) {
129   return Info->DL;
130 }
131 
132 PHILinearize::PHISourcesT &
133 PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) {
134   return Info->Sources;
135 }
136 
137 void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info,
138                                            unsigned SourceReg,
139                                            MachineBasicBlock *SourceMBB) {
140   // Assertion ensures we don't use the same SourceMBB for the
141   // sources, because we cannot have different registers with
142   // identical predecessors, but we can have the same register for
143   // multiple predecessors.
144   for (auto SI : phiInfoElementGetSources(Info)) {
145     assert((SI.second != SourceMBB || SourceReg == SI.first));
146   }
147 
148   phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB));
149 }
150 
151 void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info,
152                                               unsigned SourceReg,
153                                               MachineBasicBlock *SourceMBB) {
154   auto &Sources = phiInfoElementGetSources(Info);
155   SmallVector<PHISourceT, 4> ElimiatedSources;
156   for (auto SI : Sources) {
157     if (SI.first == SourceReg &&
158         (SI.second == nullptr || SI.second == SourceMBB)) {
159       ElimiatedSources.push_back(PHISourceT(SI.first, SI.second));
160     }
161   }
162 
163   for (auto &Source : ElimiatedSources) {
164     Sources.erase(Source);
165   }
166 }
167 
168 PHILinearize::PHIInfoElementT *
169 PHILinearize::findPHIInfoElement(unsigned DestReg) {
170   for (auto I : PHIInfo) {
171     if (phiInfoElementGetDest(I) == DestReg) {
172       return I;
173     }
174   }
175   return nullptr;
176 }
177 
178 PHILinearize::PHIInfoElementT *
179 PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg,
180                                            MachineBasicBlock *SourceMBB) {
181   for (auto I : PHIInfo) {
182     for (auto SI : phiInfoElementGetSources(I)) {
183       if (SI.first == SourceReg &&
184           (SI.second == nullptr || SI.second == SourceMBB)) {
185         return I;
186       }
187     }
188   }
189   return nullptr;
190 }
191 
192 bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
193                                       SmallVector<unsigned, 4> &Sources) {
194   bool FoundSource = false;
195   for (auto I : PHIInfo) {
196     for (auto SI : phiInfoElementGetSources(I)) {
197       if (SI.second == SourceMBB) {
198         FoundSource = true;
199         Sources.push_back(SI.first);
200       }
201     }
202   }
203   return FoundSource;
204 }
205 
206 void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
207   assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists");
208   PHISourcesT EmptySet;
209   PHIInfoElementT *NewElement = new PHIInfoElementT();
210   NewElement->DestReg = DestReg;
211   NewElement->DL = DL;
212   NewElement->Sources = EmptySet;
213   PHIInfo.insert(NewElement);
214 }
215 
216 void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) {
217   phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg);
218 }
219 
220 void PHILinearize::deleteDef(unsigned DestReg) {
221   PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg);
222   PHIInfo.erase(InfoElement);
223   delete InfoElement;
224 }
225 
226 void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg,
227                              MachineBasicBlock *SourceMBB) {
228   phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
229 }
230 
231 void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg,
232                                 MachineBasicBlock *SourceMBB) {
233   phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
234 }
235 
236 bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
237                             unsigned &DestReg) {
238   PHIInfoElementT *InfoElement =
239       findPHIInfoElementFromSource(SourceReg, SourceMBB);
240   if (InfoElement != nullptr) {
241     DestReg = phiInfoElementGetDest(InfoElement);
242     return true;
243   }
244   return false;
245 }
246 
247 bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) {
248   unsigned DestReg;
249   return findDest(Reg, SourceMBB, DestReg);
250 }
251 
252 unsigned PHILinearize::getNumSources(unsigned DestReg) {
253   return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size();
254 }
255 
256 void PHILinearize::dump(MachineRegisterInfo *MRI) {
257   const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
258   dbgs() << "=PHIInfo Start=\n";
259   for (auto PII : this->PHIInfo) {
260     PHIInfoElementT &Element = *PII;
261     dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI)
262            << " Sources: {";
263     for (auto &SI : Element.Sources) {
264       dbgs() << PrintReg(SI.first, TRI) << "(BB#"
265              << SI.second->getNumber() << "),";
266     }
267     dbgs() << "}\n";
268   }
269   dbgs() << "=PHIInfo End=\n";
270 }
271 
272 void PHILinearize::clear() { PHIInfo = PHIInfoT(); }
273 
274 PHILinearize::dest_iterator PHILinearize::dests_begin() {
275   return PHILinearizeDestIterator(PHIInfo.begin());
276 }
277 
278 PHILinearize::dest_iterator PHILinearize::dests_end() {
279   return PHILinearizeDestIterator(PHIInfo.end());
280 }
281 
282 PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) {
283   auto InfoElement = findPHIInfoElement(Reg);
284   return phiInfoElementGetSources(InfoElement).begin();
285 }
286 PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) {
287   auto InfoElement = findPHIInfoElement(Reg);
288   return phiInfoElementGetSources(InfoElement).end();
289 }
290 
291 class RegionMRT;
292 class MBBMRT;
293 
294 static unsigned getPHINumInputs(MachineInstr &PHI) {
295   assert(PHI.isPHI());
296   return (PHI.getNumOperands() - 1) / 2;
297 }
298 
299 static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) {
300   assert(PHI.isPHI());
301   return PHI.getOperand(Index * 2 + 2).getMBB();
302 }
303 
304 static void setPhiPred(MachineInstr &PHI, unsigned Index,
305                        MachineBasicBlock *NewPred) {
306   PHI.getOperand(Index * 2 + 2).setMBB(NewPred);
307 }
308 
309 static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) {
310   assert(PHI.isPHI());
311   return PHI.getOperand(Index * 2 + 1).getReg();
312 }
313 
314 static unsigned getPHIDestReg(MachineInstr &PHI) {
315   assert(PHI.isPHI());
316   return PHI.getOperand(0).getReg();
317 }
318 
319 class LinearizedRegion {
320 protected:
321   MachineBasicBlock *Entry;
322   // The exit block is part of the region, and is the last
323   // merge block before exiting the region.
324   MachineBasicBlock *Exit;
325   DenseSet<unsigned> LiveOuts;
326   SmallPtrSet<MachineBasicBlock *, 1> MBBs;
327   bool HasLoop;
328   LinearizedRegion *Parent;
329   RegionMRT *RMRT;
330 
331   void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
332                        MachineInstr *DefInstr, const MachineRegisterInfo *MRI,
333                        const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
334 
335   void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
336                              MachineInstr *DefInstr,
337                              const MachineRegisterInfo *MRI,
338                              const TargetRegisterInfo *TRI,
339                              PHILinearize &PHIInfo);
340 
341   void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
342                         const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
343                         RegionMRT *TopRegion);
344 
345   void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
346                      const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
347 
348   void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI,
349                      const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
350                      RegionMRT *TopRegion = nullptr);
351 
352 public:
353   void setRegionMRT(RegionMRT *Region) { RMRT = Region; }
354 
355   RegionMRT *getRegionMRT() { return RMRT; }
356 
357   void setParent(LinearizedRegion *P) { Parent = P; }
358 
359   LinearizedRegion *getParent() { return Parent; }
360 
361   void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr);
362 
363   void setBBSelectRegIn(unsigned Reg);
364 
365   unsigned getBBSelectRegIn();
366 
367   void setBBSelectRegOut(unsigned Reg, bool IsLiveOut);
368 
369   unsigned getBBSelectRegOut();
370 
371   void setHasLoop(bool Value);
372 
373   bool getHasLoop();
374 
375   void addLiveOut(unsigned VReg);
376 
377   void removeLiveOut(unsigned Reg);
378 
379   void replaceLiveOut(unsigned OldReg, unsigned NewReg);
380 
381   void replaceRegister(unsigned Register, unsigned NewRegister,
382                        MachineRegisterInfo *MRI, bool ReplaceInside,
383                        bool ReplaceOutside, bool IncludeLoopPHIs);
384 
385   void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister,
386                                    bool IncludeLoopPHIs,
387                                    MachineRegisterInfo *MRI);
388 
389   void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister,
390                                     bool IncludeLoopPHIs,
391                                     MachineRegisterInfo *MRI);
392 
393   DenseSet<unsigned> *getLiveOuts();
394 
395   void setEntry(MachineBasicBlock *NewEntry);
396 
397   MachineBasicBlock *getEntry();
398 
399   void setExit(MachineBasicBlock *NewExit);
400 
401   MachineBasicBlock *getExit();
402 
403   void addMBB(MachineBasicBlock *MBB);
404 
405   void addMBBs(LinearizedRegion *InnerRegion);
406 
407   bool contains(MachineBasicBlock *MBB);
408 
409   bool isLiveOut(unsigned Reg);
410 
411   bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI);
412 
413   void removeFalseRegisterKills(MachineRegisterInfo *MRI);
414 
415   void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI,
416                    const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
417 
418   LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
419                    const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
420 
421   LinearizedRegion();
422 
423   ~LinearizedRegion();
424 };
425 
426 class MRT {
427 protected:
428   RegionMRT *Parent;
429   unsigned BBSelectRegIn;
430   unsigned BBSelectRegOut;
431 
432 public:
433   unsigned getBBSelectRegIn() { return BBSelectRegIn; }
434 
435   unsigned getBBSelectRegOut() { return BBSelectRegOut; }
436 
437   void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; }
438 
439   void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; }
440 
441   virtual RegionMRT *getRegionMRT() { return nullptr; }
442 
443   virtual MBBMRT *getMBBMRT() { return nullptr; }
444 
445   bool isRegion() { return getRegionMRT() != nullptr; }
446 
447   bool isMBB() { return getMBBMRT() != nullptr; }
448 
449   bool isRoot() { return Parent == nullptr; }
450 
451   void setParent(RegionMRT *Region) { Parent = Region; }
452 
453   RegionMRT *getParent() { return Parent; }
454 
455   static MachineBasicBlock *
456   initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
457                 DenseMap<MachineRegion *, RegionMRT *> &RegionMap);
458 
459   static RegionMRT *buildMRT(MachineFunction &MF,
460                              const MachineRegionInfo *RegionInfo,
461                              const SIInstrInfo *TII,
462                              MachineRegisterInfo *MRI);
463 
464   virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0;
465 
466   void dumpDepth(int depth) {
467     for (int i = depth; i > 0; --i) {
468       dbgs() << "  ";
469     }
470   }
471 
472   virtual ~MRT() {}
473 };
474 
475 class MBBMRT : public MRT {
476   MachineBasicBlock *MBB;
477 
478 public:
479   virtual MBBMRT *getMBBMRT() { return this; }
480 
481   MachineBasicBlock *getMBB() { return MBB; }
482 
483   virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) {
484     dumpDepth(depth);
485     dbgs() << "MBB: " << getMBB()->getNumber();
486     dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI);
487     dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n";
488   }
489 
490   MBBMRT(MachineBasicBlock *BB) : MBB(BB) {
491     setParent(nullptr);
492     setBBSelectRegOut(0);
493     setBBSelectRegIn(0);
494   }
495 };
496 
497 class RegionMRT : public MRT {
498 protected:
499   MachineRegion *Region;
500   LinearizedRegion *LRegion;
501   MachineBasicBlock *Succ;
502 
503   SetVector<MRT *> Children;
504 
505 public:
506   virtual RegionMRT *getRegionMRT() { return this; }
507 
508   void setLinearizedRegion(LinearizedRegion *LinearizeRegion) {
509     LRegion = LinearizeRegion;
510   }
511 
512   LinearizedRegion *getLinearizedRegion() { return LRegion; }
513 
514   MachineRegion *getMachineRegion() { return Region; }
515 
516   unsigned getInnerOutputRegister() {
517     return (*(Children.begin()))->getBBSelectRegOut();
518   }
519 
520   void addChild(MRT *Tree) { Children.insert(Tree); }
521 
522   SetVector<MRT *> *getChildren() { return &Children; }
523 
524   virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) {
525     dumpDepth(depth);
526     dbgs() << "Region: " << (void *)Region;
527     dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI);
528     dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n";
529 
530     dumpDepth(depth);
531     if (getSucc())
532       dbgs() << "Succ: " << getSucc()->getNumber() << "\n";
533     else
534       dbgs() << "Succ: none \n";
535     for (auto MRTI : Children) {
536       MRTI->dump(TRI, depth + 1);
537     }
538   }
539 
540   MRT *getEntryTree() { return Children.back(); }
541 
542   MRT *getExitTree() { return Children.front(); }
543 
544   MachineBasicBlock *getEntry() {
545     MRT *Tree = Children.back();
546     return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry()
547                               : Tree->getMBBMRT()->getMBB();
548   }
549 
550   MachineBasicBlock *getExit() {
551     MRT *Tree = Children.front();
552     return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit()
553                               : Tree->getMBBMRT()->getMBB();
554   }
555 
556   void setSucc(MachineBasicBlock *MBB) { Succ = MBB; }
557 
558   MachineBasicBlock *getSucc() { return Succ; }
559 
560   bool contains(MachineBasicBlock *MBB) {
561     for (auto CI : Children) {
562       if (CI->isMBB()) {
563         if (MBB == CI->getMBBMRT()->getMBB()) {
564           return true;
565         }
566       } else {
567         if (CI->getRegionMRT()->contains(MBB)) {
568           return true;
569         } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr &&
570                    CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) {
571           return true;
572         }
573       }
574     }
575     return false;
576   }
577 
578   void replaceLiveOutReg(unsigned Register, unsigned NewRegister) {
579     LinearizedRegion *LRegion = getLinearizedRegion();
580     LRegion->replaceLiveOut(Register, NewRegister);
581     for (auto &CI : Children) {
582       if (CI->isRegion()) {
583         CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
584       }
585     }
586   }
587 
588   RegionMRT(MachineRegion *MachineRegion)
589       : Region(MachineRegion), LRegion(nullptr), Succ(nullptr) {
590     setParent(nullptr);
591     setBBSelectRegOut(0);
592     setBBSelectRegIn(0);
593   }
594 
595   virtual ~RegionMRT() {
596     if (LRegion) {
597       delete LRegion;
598     }
599 
600     for (auto CI : Children) {
601       delete &(*CI);
602     }
603   }
604 };
605 
606 static unsigned createBBSelectReg(const SIInstrInfo *TII,
607                                   MachineRegisterInfo *MRI) {
608   return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32));
609 }
610 
611 MachineBasicBlock *
612 MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
613                    DenseMap<MachineRegion *, RegionMRT *> &RegionMap) {
614   for (auto &MFI : MF) {
615     MachineBasicBlock *ExitMBB = &MFI;
616     if (ExitMBB->succ_size() == 0) {
617       return ExitMBB;
618     }
619   }
620   llvm_unreachable("CFG has no exit block");
621   return nullptr;
622 }
623 
624 RegionMRT *MRT::buildMRT(MachineFunction &MF,
625                          const MachineRegionInfo *RegionInfo,
626                          const SIInstrInfo *TII, MachineRegisterInfo *MRI) {
627   SmallPtrSet<MachineRegion *, 4> PlacedRegions;
628   DenseMap<MachineRegion *, RegionMRT *> RegionMap;
629   MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion();
630   RegionMRT *Result = new RegionMRT(TopLevelRegion);
631   RegionMap[TopLevelRegion] = Result;
632 
633   // Insert the exit block first, we need it to be the merge node
634   // for the top level region.
635   MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap);
636 
637   unsigned BBSelectRegIn = createBBSelectReg(TII, MRI);
638   MBBMRT *ExitMRT = new MBBMRT(Exit);
639   RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT);
640   ExitMRT->setBBSelectRegIn(BBSelectRegIn);
641 
642   for (auto MBBI : post_order(&(MF.front()))) {
643     MachineBasicBlock *MBB = &(*MBBI);
644 
645     // Skip Exit since we already added it
646     if (MBB == Exit) {
647       continue;
648     }
649 
650     DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n");
651     MBBMRT *NewMBB = new MBBMRT(MBB);
652     MachineRegion *Region = RegionInfo->getRegionFor(MBB);
653 
654     // Ensure we have the MRT region
655     if (RegionMap.count(Region) == 0) {
656       RegionMRT *NewMRTRegion = new RegionMRT(Region);
657       RegionMap[Region] = NewMRTRegion;
658 
659       // Ensure all parents are in the RegionMap
660       MachineRegion *Parent = Region->getParent();
661       while (RegionMap.count(Parent) == 0) {
662         RegionMRT *NewMRTParent = new RegionMRT(Parent);
663         NewMRTParent->addChild(NewMRTRegion);
664         NewMRTRegion->setParent(NewMRTParent);
665         RegionMap[Parent] = NewMRTParent;
666         NewMRTRegion = NewMRTParent;
667         Parent = Parent->getParent();
668       }
669       RegionMap[Parent]->addChild(NewMRTRegion);
670       NewMRTRegion->setParent(RegionMap[Parent]);
671     }
672 
673     // Add MBB to Region MRT
674     RegionMap[Region]->addChild(NewMBB);
675     NewMBB->setParent(RegionMap[Region]);
676     RegionMap[Region]->setSucc(Region->getExit());
677   }
678   return Result;
679 }
680 
681 void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
682                                        MachineInstr *DefInstr,
683                                        const MachineRegisterInfo *MRI,
684                                        const TargetRegisterInfo *TRI,
685                                        PHILinearize &PHIInfo) {
686   if (TRI->isVirtualRegister(Reg)) {
687     DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n");
688     // If this is a source register to a PHI we are chaining, it
689     // must be live out.
690     if (PHIInfo.isSource(Reg)) {
691       DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n");
692       addLiveOut(Reg);
693     } else {
694       // If this is live out of the MBB
695       for (auto &UI : MRI->use_operands(Reg)) {
696         if (UI.getParent()->getParent() != MBB) {
697           DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber()
698                        << "): " << PrintReg(Reg, TRI) << "\n");
699           addLiveOut(Reg);
700         } else {
701           // If the use is in the same MBB we have to make sure
702           // it is after the def, otherwise it is live out in a loop
703           MachineInstr *UseInstr = UI.getParent();
704           for (MachineBasicBlock::instr_iterator
705                    MII = UseInstr->getIterator(),
706                    MIE = UseInstr->getParent()->instr_end();
707                MII != MIE; ++MII) {
708             if ((&(*MII)) == DefInstr) {
709               DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI)
710                            << "\n");
711               addLiveOut(Reg);
712             }
713           }
714         }
715       }
716     }
717   }
718 }
719 
720 void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
721                                              MachineInstr *DefInstr,
722                                              const MachineRegisterInfo *MRI,
723                                              const TargetRegisterInfo *TRI,
724                                              PHILinearize &PHIInfo) {
725   if (TRI->isVirtualRegister(Reg)) {
726     DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n");
727     for (auto &UI : MRI->use_operands(Reg)) {
728       if (!Region->contains(UI.getParent()->getParent())) {
729         DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region
730                      << "): " << PrintReg(Reg, TRI) << "\n");
731         addLiveOut(Reg);
732       }
733     }
734   }
735 }
736 
737 void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
738                                      const MachineRegisterInfo *MRI,
739                                      const TargetRegisterInfo *TRI,
740                                      PHILinearize &PHIInfo) {
741   DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n");
742   for (auto &II : *MBB) {
743     for (auto &RI : II.defs()) {
744       storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo);
745     }
746     for (auto &IRI : II.implicit_operands()) {
747       if (IRI.isDef()) {
748         storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo);
749       }
750     }
751   }
752 
753   // If we have a successor with a PHI, source coming from this MBB we have to
754   // add the register as live out
755   for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
756                                         E = MBB->succ_end();
757        SI != E; ++SI) {
758     for (auto &II : *(*SI)) {
759       if (II.isPHI()) {
760         MachineInstr &PHI = II;
761         int numPreds = getPHINumInputs(PHI);
762         for (int i = 0; i < numPreds; ++i) {
763           if (getPHIPred(PHI, i) == MBB) {
764             unsigned PHIReg = getPHISourceReg(PHI, i);
765             DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber()
766                          << " -> BB#" << (*SI)->getNumber()
767                          << "): " << PrintReg(PHIReg, TRI) << "\n");
768             addLiveOut(PHIReg);
769           }
770         }
771       }
772     }
773   }
774 
775   DEBUG(dbgs() << "-Store Live Outs Endn-\n");
776 }
777 
778 void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB,
779                                         const MachineRegisterInfo *MRI,
780                                         const TargetRegisterInfo *TRI,
781                                         PHILinearize &PHIInfo,
782                                         RegionMRT *TopRegion) {
783   for (auto &II : *MBB) {
784     for (auto &RI : II.defs()) {
785       storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI,
786                             PHIInfo);
787     }
788     for (auto &IRI : II.implicit_operands()) {
789       if (IRI.isDef()) {
790         storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI,
791                               TRI, PHIInfo);
792       }
793     }
794   }
795 }
796 
797 void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
798                                      const MachineRegisterInfo *MRI,
799                                      const TargetRegisterInfo *TRI,
800                                      PHILinearize &PHIInfo,
801                                      RegionMRT *CurrentTopRegion) {
802   MachineBasicBlock *Exit = Region->getSucc();
803 
804   RegionMRT *TopRegion =
805       CurrentTopRegion == nullptr ? Region : CurrentTopRegion;
806 
807   // Check if exit is end of function, if so, no live outs.
808   if (Exit == nullptr)
809     return;
810 
811   auto Children = Region->getChildren();
812   for (auto CI : *Children) {
813     if (CI->isMBB()) {
814       auto MBB = CI->getMBBMRT()->getMBB();
815       storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion);
816     } else {
817       LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
818       // We should be limited to only store registers that are live out from the
819       // lineaized region
820       for (auto MBBI : SubRegion->MBBs) {
821         storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
822       }
823     }
824   }
825 
826   if (CurrentTopRegion == nullptr) {
827     auto Succ = Region->getSucc();
828     for (auto &II : *Succ) {
829       if (II.isPHI()) {
830         MachineInstr &PHI = II;
831         int numPreds = getPHINumInputs(PHI);
832         for (int i = 0; i < numPreds; ++i) {
833           if (Region->contains(getPHIPred(PHI, i))) {
834             unsigned PHIReg = getPHISourceReg(PHI, i);
835             DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region
836                          << "): " << PrintReg(PHIReg, TRI) << "\n");
837             addLiveOut(PHIReg);
838           }
839         }
840       }
841     }
842   }
843 }
844 
845 void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
846   OS << "Linearized Region {";
847   bool IsFirst = true;
848   for (const auto &MBB : MBBs) {
849     if (IsFirst) {
850       IsFirst = false;
851     } else {
852       OS << " ,";
853     }
854     OS << MBB->getNumber();
855   }
856   OS << "} (" << Entry->getNumber() << ", "
857      << (Exit == nullptr ? -1 : Exit->getNumber())
858      << "): In:" << PrintReg(getBBSelectRegIn(), TRI)
859      << " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {";
860   for (auto &LI : LiveOuts) {
861     OS << PrintReg(LI, TRI) << " ";
862   }
863   OS << "} \n";
864 }
865 
866 unsigned LinearizedRegion::getBBSelectRegIn() {
867   return getRegionMRT()->getBBSelectRegIn();
868 }
869 
870 unsigned LinearizedRegion::getBBSelectRegOut() {
871   return getRegionMRT()->getBBSelectRegOut();
872 }
873 
874 void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; }
875 
876 bool LinearizedRegion::getHasLoop() { return HasLoop; }
877 
878 void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); }
879 
880 void LinearizedRegion::removeLiveOut(unsigned Reg) {
881   if (isLiveOut(Reg))
882     LiveOuts.erase(Reg);
883 }
884 
885 void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) {
886   if (isLiveOut(OldReg)) {
887     removeLiveOut(OldReg);
888     addLiveOut(NewReg);
889   }
890 }
891 
892 void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister,
893                                        MachineRegisterInfo *MRI,
894                                        bool ReplaceInside, bool ReplaceOutside,
895                                        bool IncludeLoopPHI) {
896   assert(Register != NewRegister && "Cannot replace a reg with itself");
897 
898   DEBUG(dbgs() << "Pepareing to replace register (region): "
899                << PrintReg(Register, MRI->getTargetRegisterInfo()) << " with "
900                << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
901 
902   // If we are replacing outside, we also need to update the LiveOuts
903   if (ReplaceOutside &&
904       (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) {
905     LinearizedRegion *Current = this;
906     while (Current != nullptr && Current->getEntry() != nullptr) {
907       DEBUG(dbgs() << "Region before register replace\n");
908       DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
909       Current->replaceLiveOut(Register, NewRegister);
910       DEBUG(dbgs() << "Region after register replace\n");
911       DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
912       Current = Current->getParent();
913     }
914   }
915 
916   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
917                                          E = MRI->reg_end();
918        I != E;) {
919     MachineOperand &O = *I;
920     ++I;
921 
922     // We don't rewrite defs.
923     if (O.isDef())
924       continue;
925 
926     bool IsInside = contains(O.getParent()->getParent());
927     bool IsLoopPHI = IsInside && (O.getParent()->isPHI() &&
928                                   O.getParent()->getParent() == getEntry());
929     bool ShouldReplace = (IsInside && ReplaceInside) ||
930                          (!IsInside && ReplaceOutside) ||
931                          (IncludeLoopPHI && IsLoopPHI);
932     if (ShouldReplace) {
933 
934       if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
935         DEBUG(dbgs() << "Trying to substitute physical register: "
936                      << PrintReg(NewRegister, MRI->getTargetRegisterInfo())
937                      << "\n");
938         llvm_unreachable("Cannot substitute physical registers");
939       } else {
940         DEBUG(dbgs() << "Replacing register (region): "
941                      << PrintReg(Register, MRI->getTargetRegisterInfo())
942                      << " with "
943                      << PrintReg(NewRegister, MRI->getTargetRegisterInfo())
944                      << "\n");
945         O.setReg(NewRegister);
946       }
947     }
948   }
949 }
950 
951 void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register,
952                                                    unsigned NewRegister,
953                                                    bool IncludeLoopPHIs,
954                                                    MachineRegisterInfo *MRI) {
955   replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs);
956 }
957 
958 void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register,
959                                                     unsigned NewRegister,
960                                                     bool IncludeLoopPHIs,
961                                                     MachineRegisterInfo *MRI) {
962   replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs);
963 }
964 
965 DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; }
966 
967 void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) {
968   Entry = NewEntry;
969 }
970 
971 MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; }
972 
973 void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; }
974 
975 MachineBasicBlock *LinearizedRegion::getExit() { return Exit; }
976 
977 void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); }
978 
979 void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) {
980   for (const auto &MBB : InnerRegion->MBBs) {
981     addMBB(MBB);
982   }
983 }
984 
985 bool LinearizedRegion::contains(MachineBasicBlock *MBB) {
986   return MBBs.count(MBB) == 1;
987 }
988 
989 bool LinearizedRegion::isLiveOut(unsigned Reg) {
990   return LiveOuts.count(Reg) == 1;
991 }
992 
993 bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) {
994   return MRI->def_begin(Reg) == MRI->def_end();
995 }
996 
997 // After the code has been structurized, what was flagged as kills
998 // before are no longer register kills.
999 void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
1000   const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
1001   for (auto MBBI : MBBs) {
1002     MachineBasicBlock *MBB = MBBI;
1003     for (auto &II : *MBB) {
1004       for (auto &RI : II.uses()) {
1005         if (RI.isReg()) {
1006           unsigned Reg = RI.getReg();
1007           if (TRI->isVirtualRegister(Reg)) {
1008             if (hasNoDef(Reg, MRI))
1009               continue;
1010             if (!MRI->hasOneDef(Reg)) {
1011               DEBUG(this->getEntry()->getParent()->dump());
1012               DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n");
1013             }
1014 
1015             if (MRI->def_begin(Reg) == MRI->def_end()) {
1016               DEBUG(dbgs() << "Register "
1017                            << PrintReg(Reg, MRI->getTargetRegisterInfo())
1018                            << " has NO defs\n");
1019             } else if (!MRI->hasOneDef(Reg)) {
1020               DEBUG(dbgs() << "Register "
1021                            << PrintReg(Reg, MRI->getTargetRegisterInfo())
1022                            << " has multiple defs\n");
1023             }
1024 
1025             assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1026             MachineOperand *Def = &(*(MRI->def_begin(Reg)));
1027             MachineOperand *UseOperand = &(RI);
1028             bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB;
1029             if (UseIsOutsideDefMBB && UseOperand->isKill()) {
1030               DEBUG(dbgs() << "Removing kill flag on register: "
1031                            << PrintReg(Reg, TRI) << "\n");
1032               UseOperand->setIsKill(false);
1033             }
1034           }
1035         }
1036       }
1037     }
1038   }
1039 }
1040 
1041 void LinearizedRegion::initLiveOut(RegionMRT *Region,
1042                                    const MachineRegisterInfo *MRI,
1043                                    const TargetRegisterInfo *TRI,
1044                                    PHILinearize &PHIInfo) {
1045   storeLiveOuts(Region, MRI, TRI, PHIInfo);
1046 }
1047 
1048 LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB,
1049                                    const MachineRegisterInfo *MRI,
1050                                    const TargetRegisterInfo *TRI,
1051                                    PHILinearize &PHIInfo) {
1052   setEntry(MBB);
1053   setExit(MBB);
1054   storeLiveOuts(MBB, MRI, TRI, PHIInfo);
1055   MBBs.insert(MBB);
1056   Parent = nullptr;
1057 }
1058 
1059 LinearizedRegion::LinearizedRegion() {
1060   setEntry(nullptr);
1061   setExit(nullptr);
1062   Parent = nullptr;
1063 }
1064 
1065 LinearizedRegion::~LinearizedRegion() {}
1066 
1067 class AMDGPUMachineCFGStructurizer : public MachineFunctionPass {
1068 private:
1069   const MachineRegionInfo *Regions;
1070   const SIInstrInfo *TII;
1071   const TargetRegisterInfo *TRI;
1072   MachineRegisterInfo *MRI;
1073   unsigned BBSelectRegister;
1074   PHILinearize PHIInfo;
1075   DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap;
1076 
1077   void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI,
1078                            SmallVector<unsigned, 2> &RegionIndices);
1079   void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1080                            SmallVector<unsigned, 2> &RegionIndices);
1081   void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1082                               SmallVector<unsigned, 2> &PHINonRegionIndices);
1083 
1084   void storePHILinearizationInfoDest(
1085       unsigned LDestReg, MachineInstr &PHI,
1086       SmallVector<unsigned, 2> *RegionIndices = nullptr);
1087 
1088   unsigned storePHILinearizationInfo(MachineInstr &PHI,
1089                                      SmallVector<unsigned, 2> *RegionIndices);
1090 
1091   void extractKilledPHIs(MachineBasicBlock *MBB);
1092 
1093   bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices,
1094                  unsigned *ReplaceReg);
1095 
1096   bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1097                  MachineBasicBlock *SourceMBB,
1098                  SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg);
1099 
1100   void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1101                   MachineBasicBlock *LastMerge,
1102                   SmallVector<unsigned, 2> &PHIRegionIndices);
1103   void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1104                        MachineBasicBlock *IfMBB,
1105                        SmallVector<unsigned, 2> &PHIRegionIndices);
1106   void replaceLiveOutRegs(MachineInstr &PHI,
1107                           SmallVector<unsigned, 2> &PHIRegionIndices,
1108                           unsigned CombinedSourceReg,
1109                           LinearizedRegion *LRegion);
1110   void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge,
1111                             MachineInstr &PHI, LinearizedRegion *LRegion);
1112 
1113   void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge,
1114                              LinearizedRegion *LRegion);
1115   void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB,
1116                              MachineInstr &PHI);
1117   void rewriteRegionEntryPHIs(LinearizedRegion *Region,
1118                               MachineBasicBlock *IfMBB);
1119 
1120   bool regionIsSimpleIf(RegionMRT *Region);
1121 
1122   void transformSimpleIfRegion(RegionMRT *Region);
1123 
1124   void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II);
1125 
1126   void insertUnconditionalBranch(MachineBasicBlock *MBB,
1127                                  MachineBasicBlock *Dest,
1128                                  const DebugLoc &DL = DebugLoc());
1129 
1130   MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region);
1131 
1132   void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1133                       MachineBasicBlock *MergeBB, unsigned DestRegister,
1134                       unsigned IfSourceRegister, unsigned CodeSourceRegister,
1135                       bool IsUndefIfSource = false);
1136 
1137   MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB,
1138                                    MachineBasicBlock *CodeBBStart,
1139                                    MachineBasicBlock *CodeBBEnd,
1140                                    MachineBasicBlock *SelectBB, unsigned IfReg,
1141                                    bool InheritPreds);
1142 
1143   void prunePHIInfo(MachineBasicBlock *MBB);
1144   void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg);
1145 
1146   void createEntryPHIs(LinearizedRegion *CurrentRegion);
1147   void resolvePHIInfos(MachineBasicBlock *FunctionEntry);
1148 
1149   void replaceRegisterWith(unsigned Register, unsigned NewRegister);
1150 
1151   MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB,
1152                                     MachineBasicBlock *CodeBB,
1153                                     LinearizedRegion *LRegion,
1154                                     unsigned BBSelectRegIn,
1155                                     unsigned BBSelectRegOut);
1156 
1157   MachineBasicBlock *
1158   createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion,
1159                  LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
1160                  unsigned BBSelectRegIn, unsigned BBSelectRegOut);
1161   void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond);
1162 
1163   void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1164                                MachineBasicBlock *MergeBB,
1165                                unsigned BBSelectReg);
1166 
1167   MachineInstr *getDefInstr(unsigned Reg);
1168   void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1169                         MachineBasicBlock *MergeBB,
1170                         LinearizedRegion *InnerRegion, unsigned DestReg,
1171                         unsigned SourceReg);
1172   bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion,
1173                    unsigned Register);
1174   void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1175                           MachineBasicBlock *MergeBB,
1176                           LinearizedRegion *InnerRegion,
1177                           LinearizedRegion *LRegion);
1178 
1179   void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry,
1180                     MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion);
1181   void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc,
1182                      LinearizedRegion *LRegion);
1183 
1184   MachineBasicBlock *splitExit(LinearizedRegion *LRegion);
1185 
1186   MachineBasicBlock *splitEntry(LinearizedRegion *LRegion);
1187 
1188   LinearizedRegion *initLinearizedRegion(RegionMRT *Region);
1189 
1190   bool structurizeComplexRegion(RegionMRT *Region);
1191 
1192   bool structurizeRegion(RegionMRT *Region);
1193 
1194   bool structurizeRegions(RegionMRT *Region, bool isTopRegion);
1195 
1196 public:
1197   static char ID;
1198 
1199   void getAnalysisUsage(AnalysisUsage &AU) const override {
1200     AU.addRequired<MachineRegionInfoPass>();
1201     MachineFunctionPass::getAnalysisUsage(AU);
1202   }
1203 
1204     AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) {
1205       initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry());
1206     }
1207 
1208   void initFallthroughMap(MachineFunction &MF);
1209 
1210   void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut);
1211 
1212   unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg,
1213                                      MachineRegisterInfo *MRI,
1214                                      const SIInstrInfo *TII);
1215 
1216   RegionMRT *RMRT;
1217   void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; }
1218 
1219   RegionMRT *getRegionMRT() { return RMRT; }
1220 
1221   bool runOnMachineFunction(MachineFunction &MF) override;
1222 };
1223 }
1224 
1225 char AMDGPUMachineCFGStructurizer::ID = 0;
1226 
1227 bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) {
1228   MachineBasicBlock *Entry = Region->getEntry();
1229   MachineBasicBlock *Succ = Region->getSucc();
1230   bool FoundBypass = false;
1231   bool FoundIf = false;
1232 
1233   if (Entry->succ_size() != 2) {
1234     return false;
1235   }
1236 
1237   for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(),
1238                                               E = Entry->succ_end();
1239        SI != E; ++SI) {
1240     MachineBasicBlock *Current = *SI;
1241 
1242     if (Current == Succ) {
1243       FoundBypass = true;
1244     } else if ((Current->succ_size() == 1) &&
1245                *(Current->succ_begin()) == Succ) {
1246       FoundIf = true;
1247     }
1248   }
1249 
1250   return FoundIf && FoundBypass;
1251 }
1252 
1253 void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) {
1254   MachineBasicBlock *Entry = Region->getEntry();
1255   MachineBasicBlock *Exit = Region->getExit();
1256   TII->convertNonUniformIfRegion(Entry, Exit);
1257 }
1258 
1259 static void fixMBBTerminator(MachineBasicBlock *MBB) {
1260 
1261   if (MBB->succ_size() == 1) {
1262     auto *Succ = *(MBB->succ_begin());
1263     for (auto &TI : MBB->terminators()) {
1264       for (auto &UI : TI.uses()) {
1265         if (UI.isMBB() && UI.getMBB() != Succ) {
1266           UI.setMBB(Succ);
1267         }
1268       }
1269     }
1270   }
1271 }
1272 
1273 static void fixRegionTerminator(RegionMRT *Region) {
1274   MachineBasicBlock *InternalSucc = nullptr;
1275   MachineBasicBlock *ExternalSucc = nullptr;
1276   LinearizedRegion *LRegion = Region->getLinearizedRegion();
1277   auto Exit = LRegion->getExit();
1278 
1279   SmallPtrSet<MachineBasicBlock *, 2> Successors;
1280   for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(),
1281                                               SE = Exit->succ_end();
1282        SI != SE; ++SI) {
1283     MachineBasicBlock *Succ = *SI;
1284     if (LRegion->contains(Succ)) {
1285       // Do not allow re-assign
1286       assert(InternalSucc == nullptr);
1287       InternalSucc = Succ;
1288     } else {
1289       // Do not allow re-assign
1290       assert(ExternalSucc == nullptr);
1291       ExternalSucc = Succ;
1292     }
1293   }
1294 
1295   for (auto &TI : Exit->terminators()) {
1296     for (auto &UI : TI.uses()) {
1297       if (UI.isMBB()) {
1298         auto Target = UI.getMBB();
1299         if (Target != InternalSucc && Target != ExternalSucc) {
1300           UI.setMBB(ExternalSucc);
1301         }
1302       }
1303     }
1304   }
1305 }
1306 
1307 // If a region region is just a sequence of regions (and the exit
1308 // block in the case of the top level region), we can simply skip
1309 // linearizing it, because it is already linear
1310 bool regionIsSequence(RegionMRT *Region) {
1311   auto Children = Region->getChildren();
1312   for (auto CI : *Children) {
1313     if (!CI->isRegion()) {
1314       if (CI->getMBBMRT()->getMBB()->succ_size() > 1) {
1315         return false;
1316       }
1317     }
1318   }
1319   return true;
1320 }
1321 
1322 void fixupRegionExits(RegionMRT *Region) {
1323   auto Children = Region->getChildren();
1324   for (auto CI : *Children) {
1325     if (!CI->isRegion()) {
1326       fixMBBTerminator(CI->getMBBMRT()->getMBB());
1327     } else {
1328       fixRegionTerminator(CI->getRegionMRT());
1329     }
1330   }
1331 }
1332 
1333 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1334     RegionMRT *Region, MachineInstr &PHI,
1335     SmallVector<unsigned, 2> &PHIRegionIndices) {
1336   unsigned NumInputs = getPHINumInputs(PHI);
1337   for (unsigned i = 0; i < NumInputs; ++i) {
1338     MachineBasicBlock *Pred = getPHIPred(PHI, i);
1339     if (Region->contains(Pred)) {
1340       PHIRegionIndices.push_back(i);
1341     }
1342   }
1343 }
1344 
1345 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1346     LinearizedRegion *Region, MachineInstr &PHI,
1347     SmallVector<unsigned, 2> &PHIRegionIndices) {
1348   unsigned NumInputs = getPHINumInputs(PHI);
1349   for (unsigned i = 0; i < NumInputs; ++i) {
1350     MachineBasicBlock *Pred = getPHIPred(PHI, i);
1351     if (Region->contains(Pred)) {
1352       PHIRegionIndices.push_back(i);
1353     }
1354   }
1355 }
1356 
1357 void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices(
1358     LinearizedRegion *Region, MachineInstr &PHI,
1359     SmallVector<unsigned, 2> &PHINonRegionIndices) {
1360   unsigned NumInputs = getPHINumInputs(PHI);
1361   for (unsigned i = 0; i < NumInputs; ++i) {
1362     MachineBasicBlock *Pred = getPHIPred(PHI, i);
1363     if (!Region->contains(Pred)) {
1364       PHINonRegionIndices.push_back(i);
1365     }
1366   }
1367 }
1368 
1369 void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest(
1370     unsigned LDestReg, MachineInstr &PHI,
1371     SmallVector<unsigned, 2> *RegionIndices) {
1372   if (RegionIndices) {
1373     for (auto i : *RegionIndices) {
1374       PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1375     }
1376   } else {
1377     unsigned NumInputs = getPHINumInputs(PHI);
1378     for (unsigned i = 0; i < NumInputs; ++i) {
1379       PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1380     }
1381   }
1382 }
1383 
1384 unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo(
1385     MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) {
1386   unsigned DestReg = getPHIDestReg(PHI);
1387   unsigned LinearizeDestReg =
1388       MRI->createVirtualRegister(MRI->getRegClass(DestReg));
1389   PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc());
1390   storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices);
1391   return LinearizeDestReg;
1392 }
1393 
1394 void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
1395   // We need to create a new chain for the killed phi, but there is no
1396   // need to do the renaming outside or inside the block.
1397   SmallPtrSet<MachineInstr *, 2> PHIs;
1398   for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
1399                                          E = MBB->instr_end();
1400        I != E; ++I) {
1401     MachineInstr &Instr = *I;
1402     if (Instr.isPHI()) {
1403       unsigned PHIDestReg = getPHIDestReg(Instr);
1404       DEBUG(dbgs() << "Extractking killed phi:\n");
1405       DEBUG(Instr.dump());
1406       PHIs.insert(&Instr);
1407       PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
1408       storePHILinearizationInfoDest(PHIDestReg, Instr);
1409     }
1410   }
1411 
1412   for (auto PI : PHIs) {
1413     PI->eraseFromParent();
1414   }
1415 }
1416 
1417 static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices,
1418                              unsigned Index) {
1419   for (auto i : PHIRegionIndices) {
1420     if (i == Index)
1421       return true;
1422   }
1423   return false;
1424 }
1425 
1426 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1427                                        SmallVector<unsigned, 2> &PHIIndices,
1428                                        unsigned *ReplaceReg) {
1429   return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg);
1430 }
1431 
1432 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1433                                        unsigned CombinedSourceReg,
1434                                        MachineBasicBlock *SourceMBB,
1435                                        SmallVector<unsigned, 2> &PHIIndices,
1436                                        unsigned *ReplaceReg) {
1437   DEBUG(dbgs() << "Shrink PHI: ");
1438   DEBUG(PHI.dump());
1439   DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI)
1440                << "<def> = PHI(");
1441 
1442   bool Replaced = false;
1443   unsigned NumInputs = getPHINumInputs(PHI);
1444   int SingleExternalEntryIndex = -1;
1445   for (unsigned i = 0; i < NumInputs; ++i) {
1446     if (!isPHIRegionIndex(PHIIndices, i)) {
1447       if (SingleExternalEntryIndex == -1) {
1448         // Single entry
1449         SingleExternalEntryIndex = i;
1450       } else {
1451         // Multiple entries
1452         SingleExternalEntryIndex = -2;
1453       }
1454     }
1455   }
1456 
1457   if (SingleExternalEntryIndex > -1) {
1458     *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex);
1459     // We should not rewrite the code, we should only pick up the single value
1460     // that represents the shrunk PHI.
1461     Replaced = true;
1462   } else {
1463     MachineBasicBlock *MBB = PHI.getParent();
1464     MachineInstrBuilder MIB =
1465         BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1466                 getPHIDestReg(PHI));
1467     if (SourceMBB) {
1468       MIB.addReg(CombinedSourceReg);
1469       MIB.addMBB(SourceMBB);
1470       DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#"
1471                    << SourceMBB->getNumber());
1472     }
1473 
1474     for (unsigned i = 0; i < NumInputs; ++i) {
1475       if (isPHIRegionIndex(PHIIndices, i)) {
1476         continue;
1477       }
1478       unsigned SourceReg = getPHISourceReg(PHI, i);
1479       MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1480       MIB.addReg(SourceReg);
1481       MIB.addMBB(SourcePred);
1482       DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#"
1483                    << SourcePred->getNumber());
1484     }
1485     DEBUG(dbgs() << ")\n");
1486   }
1487   PHI.eraseFromParent();
1488   return Replaced;
1489 }
1490 
1491 void AMDGPUMachineCFGStructurizer::replacePHI(
1492     MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge,
1493     SmallVector<unsigned, 2> &PHIRegionIndices) {
1494   DEBUG(dbgs() << "Replace PHI: ");
1495   DEBUG(PHI.dump());
1496   DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI)
1497                << "<def> = PHI(");
1498 
1499   bool HasExternalEdge = false;
1500   unsigned NumInputs = getPHINumInputs(PHI);
1501   for (unsigned i = 0; i < NumInputs; ++i) {
1502     if (!isPHIRegionIndex(PHIRegionIndices, i)) {
1503       HasExternalEdge = true;
1504     }
1505   }
1506 
1507   if (HasExternalEdge) {
1508     MachineBasicBlock *MBB = PHI.getParent();
1509     MachineInstrBuilder MIB =
1510         BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1511                 getPHIDestReg(PHI));
1512     MIB.addReg(CombinedSourceReg);
1513     MIB.addMBB(LastMerge);
1514     DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#"
1515                  << LastMerge->getNumber());
1516     for (unsigned i = 0; i < NumInputs; ++i) {
1517       if (isPHIRegionIndex(PHIRegionIndices, i)) {
1518         continue;
1519       }
1520       unsigned SourceReg = getPHISourceReg(PHI, i);
1521       MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1522       MIB.addReg(SourceReg);
1523       MIB.addMBB(SourcePred);
1524       DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#"
1525                    << SourcePred->getNumber());
1526     }
1527     DEBUG(dbgs() << ")\n");
1528   } else {
1529     replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg);
1530   }
1531   PHI.eraseFromParent();
1532 }
1533 
1534 void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
1535     MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB,
1536     SmallVector<unsigned, 2> &PHIRegionIndices) {
1537 
1538   DEBUG(dbgs() << "Replace entry PHI: ");
1539   DEBUG(PHI.dump());
1540   DEBUG(dbgs() << " with ");
1541 
1542   unsigned NumInputs = getPHINumInputs(PHI);
1543   unsigned NumNonRegionInputs = NumInputs;
1544   for (unsigned i = 0; i < NumInputs; ++i) {
1545     if (isPHIRegionIndex(PHIRegionIndices, i)) {
1546       NumNonRegionInputs--;
1547     }
1548   }
1549 
1550   if (NumNonRegionInputs == 0) {
1551     auto DestReg = getPHIDestReg(PHI);
1552     replaceRegisterWith(DestReg, CombinedSourceReg);
1553     DEBUG(dbgs() << " register " << PrintReg(CombinedSourceReg, TRI) << "\n");
1554     PHI.eraseFromParent();
1555   } else {
1556     DEBUG(dbgs() << PrintReg(getPHIDestReg(PHI), TRI) << "<def> = PHI(");
1557     MachineBasicBlock *MBB = PHI.getParent();
1558     MachineInstrBuilder MIB =
1559         BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1560                 getPHIDestReg(PHI));
1561     MIB.addReg(CombinedSourceReg);
1562     MIB.addMBB(IfMBB);
1563     DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#"
1564                  << IfMBB->getNumber());
1565     unsigned NumInputs = getPHINumInputs(PHI);
1566     for (unsigned i = 0; i < NumInputs; ++i) {
1567       if (isPHIRegionIndex(PHIRegionIndices, i)) {
1568         continue;
1569       }
1570       unsigned SourceReg = getPHISourceReg(PHI, i);
1571       MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1572       MIB.addReg(SourceReg);
1573       MIB.addMBB(SourcePred);
1574       DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#"
1575                    << SourcePred->getNumber());
1576     }
1577     DEBUG(dbgs() << ")\n");
1578     PHI.eraseFromParent();
1579   }
1580 }
1581 
1582 void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs(
1583     MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices,
1584     unsigned CombinedSourceReg, LinearizedRegion *LRegion) {
1585   bool WasLiveOut = false;
1586   for (auto PII : PHIRegionIndices) {
1587     unsigned Reg = getPHISourceReg(PHI, PII);
1588     if (LRegion->isLiveOut(Reg)) {
1589       bool IsDead = true;
1590 
1591       // Check if register is live out of the basic block
1592       MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent();
1593       for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) {
1594         if ((*UI).getParent()->getParent() != DefMBB) {
1595           IsDead = false;
1596         }
1597       }
1598 
1599       DEBUG(dbgs() << "Register " << PrintReg(Reg, TRI) << " is "
1600                    << (IsDead ? "dead" : "alive") << " after PHI replace\n");
1601       if (IsDead) {
1602         LRegion->removeLiveOut(Reg);
1603       }
1604       WasLiveOut = true;
1605     }
1606   }
1607 
1608   if (WasLiveOut)
1609     LRegion->addLiveOut(CombinedSourceReg);
1610 }
1611 
1612 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region,
1613                                                   MachineBasicBlock *LastMerge,
1614                                                   MachineInstr &PHI,
1615                                                   LinearizedRegion *LRegion) {
1616   SmallVector<unsigned, 2> PHIRegionIndices;
1617   getPHIRegionIndices(Region, PHI, PHIRegionIndices);
1618   unsigned LinearizedSourceReg =
1619       storePHILinearizationInfo(PHI, &PHIRegionIndices);
1620 
1621   replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices);
1622   replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion);
1623 }
1624 
1625 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region,
1626                                                    MachineBasicBlock *IfMBB,
1627                                                    MachineInstr &PHI) {
1628   SmallVector<unsigned, 2> PHINonRegionIndices;
1629   getPHINonRegionIndices(Region, PHI, PHINonRegionIndices);
1630   unsigned LinearizedSourceReg =
1631       storePHILinearizationInfo(PHI, &PHINonRegionIndices);
1632   replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices);
1633 }
1634 
1635 static void collectPHIs(MachineBasicBlock *MBB,
1636                         SmallVector<MachineInstr *, 2> &PHIs) {
1637   for (auto &BBI : *MBB) {
1638     if (BBI.isPHI()) {
1639       PHIs.push_back(&BBI);
1640     }
1641   }
1642 }
1643 
1644 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region,
1645                                                    MachineBasicBlock *LastMerge,
1646                                                    LinearizedRegion *LRegion) {
1647   SmallVector<MachineInstr *, 2> PHIs;
1648   auto Exit = Region->getSucc();
1649   if (Exit == nullptr)
1650     return;
1651 
1652   collectPHIs(Exit, PHIs);
1653 
1654   for (auto PHII : PHIs) {
1655     rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion);
1656   }
1657 }
1658 
1659 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region,
1660                                                     MachineBasicBlock *IfMBB) {
1661   SmallVector<MachineInstr *, 2> PHIs;
1662   auto Entry = Region->getEntry();
1663 
1664   collectPHIs(Entry, PHIs);
1665 
1666   for (auto PHII : PHIs) {
1667     rewriteRegionEntryPHI(Region, IfMBB, *PHII);
1668   }
1669 }
1670 
1671 void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB,
1672                                                        MachineBasicBlock *Dest,
1673                                                        const DebugLoc &DL) {
1674   DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber()
1675                << " -> " << Dest->getNumber() << "\n");
1676   MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator();
1677   bool HasTerminator = Terminator != MBB->instr_end();
1678   if (HasTerminator) {
1679     TII->ReplaceTailWithBranchTo(Terminator, Dest);
1680   }
1681   if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) {
1682     TII->insertUnconditionalBranch(*MBB, Dest, DL);
1683   }
1684 }
1685 
1686 static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) {
1687   MachineBasicBlock *result = nullptr;
1688   for (auto &MFI : MF) {
1689     if (MFI.succ_size() == 0) {
1690       if (result == nullptr) {
1691         result = &MFI;
1692       } else {
1693         return nullptr;
1694       }
1695     }
1696   }
1697 
1698   return result;
1699 }
1700 
1701 static bool hasOneExitNode(MachineFunction &MF) {
1702   return getSingleExitNode(MF) != nullptr;
1703 }
1704 
1705 MachineBasicBlock *
1706 AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) {
1707   auto Exit = Region->getSucc();
1708 
1709   // If the exit is the end of the function, we just use the existing
1710   MachineFunction *MF = Region->getEntry()->getParent();
1711   if (Exit == nullptr && hasOneExitNode(*MF)) {
1712     return &(*(--(Region->getEntry()->getParent()->end())));
1713   }
1714 
1715   MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock();
1716   if (Exit == nullptr) {
1717     MachineFunction::iterator ExitIter = MF->end();
1718     MF->insert(ExitIter, LastMerge);
1719   } else {
1720     MachineFunction::iterator ExitIter = Exit->getIterator();
1721     MF->insert(ExitIter, LastMerge);
1722     LastMerge->addSuccessor(Exit);
1723     insertUnconditionalBranch(LastMerge, Exit);
1724     DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n");
1725   }
1726   return LastMerge;
1727 }
1728 
1729 void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB,
1730                                             MachineBasicBlock *CodeBB,
1731                                             MachineBasicBlock *MergeBB,
1732                                             unsigned DestRegister,
1733                                             unsigned IfSourceRegister,
1734                                             unsigned CodeSourceRegister,
1735                                             bool IsUndefIfSource) {
1736   // If this is the function exit block, we don't need a phi.
1737   if (MergeBB->succ_begin() == MergeBB->succ_end()) {
1738     return;
1739   }
1740   DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber()
1741                << "): " << PrintReg(DestRegister, TRI) << "<def> = PHI("
1742                << PrintReg(IfSourceRegister, TRI) << ", BB#"
1743                << IfBB->getNumber() << PrintReg(CodeSourceRegister, TRI)
1744                << ", BB#" << CodeBB->getNumber() << ")\n");
1745   const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin());
1746   MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL,
1747                                     TII->get(TargetOpcode::PHI), DestRegister);
1748   if (IsUndefIfSource && false) {
1749     MIB.addReg(IfSourceRegister, RegState::Undef);
1750   } else {
1751     MIB.addReg(IfSourceRegister);
1752   }
1753   MIB.addMBB(IfBB);
1754   MIB.addReg(CodeSourceRegister);
1755   MIB.addMBB(CodeBB);
1756 }
1757 
1758 static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) {
1759   for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
1760                                         E = MBB->succ_end();
1761        PI != E; ++PI) {
1762     if ((*PI) != MBB) {
1763       (MBB)->removeSuccessor(*PI);
1764     }
1765   }
1766 }
1767 
1768 static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
1769                                    MachineBasicBlock *EndMBB) {
1770 
1771   // We have to check against the StartMBB successor becasuse a
1772   // structurized region with a loop will have the entry block split,
1773   // and the backedge will go to the entry successor.
1774   DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs;
1775   unsigned SuccSize = StartMBB->succ_size();
1776   if (SuccSize > 0) {
1777     MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin());
1778     for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(),
1779                                           E = EndMBB->succ_end();
1780          PI != E; ++PI) {
1781       // Either we have a back-edge to the entry block, or a back-edge to the
1782       // succesor of the entry block since the block may be split.
1783       if ((*PI) != StartMBB &&
1784           !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
1785         Succs.insert(
1786             std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI));
1787       }
1788     }
1789   }
1790 
1791   for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(),
1792                                         E = StartMBB->pred_end();
1793        PI != E; ++PI) {
1794     if ((*PI) != EndMBB) {
1795       Succs.insert(
1796           std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB));
1797     }
1798   }
1799 
1800   for (auto SI : Succs) {
1801     std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI;
1802     DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#"
1803                  << Edge.second->getNumber() << "\n");
1804     Edge.first->removeSuccessor(Edge.second);
1805   }
1806 }
1807 
1808 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
1809     MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart,
1810     MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg,
1811     bool InheritPreds) {
1812   MachineFunction *MF = MergeBB->getParent();
1813   MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock();
1814 
1815   if (InheritPreds) {
1816     for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(),
1817                                           E = CodeBBStart->pred_end();
1818          PI != E; ++PI) {
1819       if ((*PI) != CodeBBEnd) {
1820         MachineBasicBlock *Pred = (*PI);
1821         Pred->addSuccessor(IfBB);
1822       }
1823     }
1824   }
1825 
1826   removeExternalCFGEdges(CodeBBStart, CodeBBEnd);
1827 
1828   auto CodeBBStartI = CodeBBStart->getIterator();
1829   auto CodeBBEndI = CodeBBEnd->getIterator();
1830   auto MergeIter = MergeBB->getIterator();
1831   MF->insert(MergeIter, IfBB);
1832   MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI);
1833   IfBB->addSuccessor(MergeBB);
1834   IfBB->addSuccessor(CodeBBStart);
1835 
1836   DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
1837   // Ensure that the MergeBB is a succesor of the CodeEndBB.
1838   if (!CodeBBEnd->isSuccessor(MergeBB))
1839     CodeBBEnd->addSuccessor(MergeBB);
1840 
1841   DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#"
1842                << CodeBBEnd->getNumber() << "\n");
1843 
1844   // If we have a single predecessor we can find a reasonable debug location
1845   MachineBasicBlock *SinglePred =
1846       CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr;
1847   const DebugLoc &DL = SinglePred
1848                     ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator())
1849                     : DebugLoc();
1850 
1851   unsigned Reg =
1852       TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg,
1853                     SelectBB->getNumber() /* CodeBBStart->getNumber() */);
1854   if (&(*(IfBB->getParent()->begin())) == IfBB) {
1855     TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg,
1856                               CodeBBStart->getNumber());
1857   }
1858   MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
1859   ArrayRef<MachineOperand> Cond(RegOp);
1860   TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL);
1861 
1862   return IfBB;
1863 }
1864 
1865 void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled(
1866     SmallVector<MachineOperand, 1> Cond) {
1867   if (Cond.size() != 1)
1868     return;
1869   if (!Cond[0].isReg())
1870     return;
1871 
1872   unsigned CondReg = Cond[0].getReg();
1873   for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) {
1874     (*UI).setIsKill(false);
1875   }
1876 }
1877 
1878 void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1879                                                      MachineBasicBlock *MergeBB,
1880                                                      unsigned BBSelectReg) {
1881   MachineBasicBlock *TrueBB = nullptr;
1882   MachineBasicBlock *FalseBB = nullptr;
1883   SmallVector<MachineOperand, 1> Cond;
1884   MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB];
1885   TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond);
1886 
1887   const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator());
1888 
1889   if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) {
1890     // This is an exit block, hence no successors. We will assign the
1891     // bb select register to the entry block.
1892     TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1893                               BBSelectReg,
1894                               CodeBB->getParent()->begin()->getNumber());
1895     insertUnconditionalBranch(CodeBB, MergeBB, DL);
1896     return;
1897   }
1898 
1899   if (FalseBB == nullptr && TrueBB == nullptr) {
1900     TrueBB = FallthroughBB;
1901   } else if (TrueBB != nullptr) {
1902     FalseBB =
1903         (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB;
1904   }
1905 
1906   if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) {
1907     TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1908                               BBSelectReg, TrueBB->getNumber());
1909   } else {
1910     const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg);
1911     unsigned TrueBBReg = MRI->createVirtualRegister(RegClass);
1912     unsigned FalseBBReg = MRI->createVirtualRegister(RegClass);
1913     TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1914                               TrueBBReg, TrueBB->getNumber());
1915     TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
1916                               FalseBBReg, FalseBB->getNumber());
1917     ensureCondIsNotKilled(Cond);
1918     TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL,
1919                             BBSelectReg, Cond, TrueBBReg, FalseBBReg);
1920   }
1921 
1922   insertUnconditionalBranch(CodeBB, MergeBB, DL);
1923 }
1924 
1925 MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) {
1926   if (MRI->def_begin(Reg) == MRI->def_end()) {
1927     DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo())
1928                  << " has NO defs\n");
1929   } else if (!MRI->hasOneDef(Reg)) {
1930     DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo())
1931                  << " has multiple defs\n");
1932     DEBUG(dbgs() << "DEFS BEGIN:\n");
1933     for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) {
1934       DEBUG(DI->getParent()->dump());
1935     }
1936     DEBUG(dbgs() << "DEFS END\n");
1937   }
1938 
1939   assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1940   return (*(MRI->def_begin(Reg))).getParent();
1941 }
1942 
1943 void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB,
1944                                               MachineBasicBlock *CodeBB,
1945                                               MachineBasicBlock *MergeBB,
1946                                               LinearizedRegion *InnerRegion,
1947                                               unsigned DestReg,
1948                                               unsigned SourceReg) {
1949   // In this function we know we are part of a chain already, so we need
1950   // to add the registers to the existing chain, and rename the register
1951   // inside the region.
1952   bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
1953   MachineInstr *DefInstr = getDefInstr(SourceReg);
1954   if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) {
1955     // Handle the case where the def is a PHI-def inside a basic
1956     // block, then we only need to do renaming. Special care needs to
1957     // be taken if the PHI-def is part of an existing chain, or if a
1958     // new one needs to be created.
1959     InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI);
1960 
1961     // We collect all PHI Information, and if we are at the region entry,
1962     // all PHIs will be removed, and then re-introduced if needed.
1963     storePHILinearizationInfoDest(DestReg, *DefInstr);
1964     // We have picked up all the information we need now and can remove
1965     // the PHI
1966     PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
1967     DefInstr->eraseFromParent();
1968   } else {
1969     // If this is not a phi-def, or it is a phi-def but from a linearized region
1970     if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) {
1971       // If this is a single BB and the definition is in this block we
1972       // need to replace any uses outside the region.
1973       InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI);
1974     }
1975     const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg);
1976     unsigned NextDestReg = MRI->createVirtualRegister(RegClass);
1977     bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1;
1978     DEBUG(dbgs() << "Insert Chained PHI\n");
1979     insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg,
1980                    SourceReg, IsLastDef);
1981 
1982     PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
1983     if (IsLastDef) {
1984       const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator());
1985       TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL,
1986                                 NextDestReg, 0);
1987       PHIInfo.deleteDef(DestReg);
1988     } else {
1989       PHIInfo.replaceDef(DestReg, NextDestReg);
1990     }
1991   }
1992 }
1993 
1994 bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB,
1995                                          LinearizedRegion *InnerRegion,
1996                                          unsigned Register) {
1997   return getDefInstr(Register)->getParent() == MBB ||
1998          InnerRegion->contains(getDefInstr(Register)->getParent());
1999 }
2000 
2001 void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
2002                                                 MachineBasicBlock *CodeBB,
2003                                                 MachineBasicBlock *MergeBB,
2004                                                 LinearizedRegion *InnerRegion,
2005                                                 LinearizedRegion *LRegion) {
2006   DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts();
2007   SmallVector<unsigned, 4> OldLiveOuts;
2008   bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
2009   for (auto OLI : *LiveOuts) {
2010     OldLiveOuts.push_back(OLI);
2011   }
2012 
2013   for (auto LI : OldLiveOuts) {
2014     DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI));
2015     if (!containsDef(CodeBB, InnerRegion, LI) ||
2016         (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
2017       // If the register simly lives through the CodeBB, we don't have
2018       // to rewrite anything since the register is not defined in this
2019       // part of the code.
2020       DEBUG(dbgs() << "- through");
2021       continue;
2022     }
2023     DEBUG(dbgs() << "\n");
2024     unsigned Reg = LI;
2025     if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
2026       // If the register is live out, we do want to create a phi,
2027       // unless it is from the Exit block, becasuse in that case there
2028       // is already a PHI, and no need to create a new one.
2029 
2030       // If the register is just a live out def and not part of a phi
2031       // chain, we need to create a PHI node to handle the if region,
2032       // and replace all uses outside of the region with the new dest
2033       // register, unless it is the outgoing BB select register. We have
2034       // already creaed phi nodes for these.
2035       const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
2036       unsigned PHIDestReg = MRI->createVirtualRegister(RegClass);
2037       unsigned IfSourceReg = MRI->createVirtualRegister(RegClass);
2038       // Create initializer, this value is never used, but is needed
2039       // to satisfy SSA.
2040       DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n");
2041       TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(),
2042                         IfSourceReg, 0);
2043 
2044       InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI);
2045       DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n");
2046       insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg,
2047                      IfSourceReg, Reg, true);
2048     }
2049   }
2050 
2051   // Handle the chained definitions in PHIInfo, checking if this basic block
2052   // is a source block for a definition.
2053   SmallVector<unsigned, 4> Sources;
2054   if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) {
2055     DEBUG(dbgs() << "Inserting PHI Live Out from BB#" << CodeBB->getNumber()
2056                  << "\n");
2057     for (auto SI : Sources) {
2058       unsigned DestReg;
2059       PHIInfo.findDest(SI, CodeBB, DestReg);
2060       insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI);
2061     }
2062     DEBUG(dbgs() << "Insertion done.\n");
2063   }
2064 
2065   DEBUG(PHIInfo.dump(MRI));
2066 }
2067 
2068 void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) {
2069   DEBUG(dbgs() << "Before PHI Prune\n");
2070   DEBUG(PHIInfo.dump(MRI));
2071   SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4>
2072       ElimiatedSources;
2073   for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2074        ++DRI) {
2075 
2076     unsigned DestReg = *DRI;
2077     auto SE = PHIInfo.sources_end(DestReg);
2078 
2079     bool MBBContainsPHISource = false;
2080     // Check if there is a PHI source in this MBB
2081     for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2082       unsigned SourceReg = (*SRI).first;
2083       MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2084       if (Def->getParent()->getParent() == MBB) {
2085         MBBContainsPHISource = true;
2086       }
2087     }
2088 
2089     // If so, all other sources are useless since we know this block
2090     // is always executed when the region is executed.
2091     if (MBBContainsPHISource) {
2092       for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2093         PHILinearize::PHISourceT Source = *SRI;
2094         unsigned SourceReg = Source.first;
2095         MachineBasicBlock *SourceMBB = Source.second;
2096         MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2097         if (Def->getParent()->getParent() != MBB) {
2098           ElimiatedSources.push_back(
2099               std::make_tuple(DestReg, SourceReg, SourceMBB));
2100         }
2101       }
2102     }
2103   }
2104 
2105   // Remove the PHI sources that are in the given MBB
2106   for (auto &SourceInfo : ElimiatedSources) {
2107     PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo),
2108                          std::get<2>(SourceInfo));
2109   }
2110   DEBUG(dbgs() << "After PHI Prune\n");
2111   DEBUG(PHIInfo.dump(MRI));
2112 }
2113 
2114 void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion,
2115                                             unsigned DestReg) {
2116   MachineBasicBlock *Entry = CurrentRegion->getEntry();
2117   MachineBasicBlock *Exit = CurrentRegion->getExit();
2118 
2119   DEBUG(dbgs() << "RegionExit: " << Exit->getNumber()
2120                << " Pred: " << (*(Entry->pred_begin()))->getNumber() << "\n");
2121 
2122   int NumSources = 0;
2123   auto SE = PHIInfo.sources_end(DestReg);
2124 
2125   for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2126     NumSources++;
2127   }
2128 
2129   if (NumSources == 1) {
2130     auto SRI = PHIInfo.sources_begin(DestReg);
2131     unsigned SourceReg = (*SRI).first;
2132     replaceRegisterWith(DestReg, SourceReg);
2133   } else {
2134     const DebugLoc &DL = Entry->findDebugLoc(Entry->begin());
2135     MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL,
2136                                       TII->get(TargetOpcode::PHI), DestReg);
2137     DEBUG(dbgs() << "Entry PHI " << PrintReg(DestReg, TRI) << "<def> = PHI(");
2138 
2139     unsigned CurrentBackedgeReg = 0;
2140 
2141     for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2142       unsigned SourceReg = (*SRI).first;
2143 
2144       if (CurrentRegion->contains((*SRI).second)) {
2145         if (CurrentBackedgeReg == 0) {
2146           CurrentBackedgeReg = SourceReg;
2147         } else {
2148           MachineInstr *PHIDefInstr = getDefInstr(SourceReg);
2149           MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent();
2150           const TargetRegisterClass *RegClass =
2151               MRI->getRegClass(CurrentBackedgeReg);
2152           unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass);
2153           MachineInstrBuilder BackedgePHI =
2154               BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL,
2155                       TII->get(TargetOpcode::PHI), NewBackedgeReg);
2156           BackedgePHI.addReg(CurrentBackedgeReg);
2157           BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0));
2158           BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1));
2159           BackedgePHI.addMBB((*SRI).second);
2160           CurrentBackedgeReg = NewBackedgeReg;
2161           DEBUG(dbgs() << "Inserting backedge PHI: "
2162                        << PrintReg(NewBackedgeReg, TRI) << "<def> = PHI("
2163                        << PrintReg(CurrentBackedgeReg, TRI) << ", BB#"
2164                        << getPHIPred(*PHIDefInstr, 0)->getNumber() << ", "
2165                        << PrintReg(getPHISourceReg(*PHIDefInstr, 1), TRI)
2166                        << ", BB#" << (*SRI).second->getNumber());
2167         }
2168       } else {
2169         MIB.addReg(SourceReg);
2170         MIB.addMBB((*SRI).second);
2171         DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#"
2172                      << (*SRI).second->getNumber() << ", ");
2173       }
2174     }
2175 
2176     // Add the final backedge register source to the entry phi
2177     if (CurrentBackedgeReg != 0) {
2178       MIB.addReg(CurrentBackedgeReg);
2179       MIB.addMBB(Exit);
2180       DEBUG(dbgs() << PrintReg(CurrentBackedgeReg, TRI) << ", BB#"
2181                    << Exit->getNumber() << ")\n");
2182     } else {
2183       DEBUG(dbgs() << ")\n");
2184     }
2185   }
2186 }
2187 
2188 void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) {
2189   DEBUG(PHIInfo.dump(MRI));
2190 
2191   for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2192        ++DRI) {
2193 
2194     unsigned DestReg = *DRI;
2195     createEntryPHI(CurrentRegion, DestReg);
2196   }
2197   PHIInfo.clear();
2198 }
2199 
2200 void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register,
2201                                                  unsigned NewRegister) {
2202   assert(Register != NewRegister && "Cannot replace a reg with itself");
2203 
2204   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
2205                                          E = MRI->reg_end();
2206        I != E;) {
2207     MachineOperand &O = *I;
2208     ++I;
2209     if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
2210       DEBUG(dbgs() << "Trying to substitute physical register: "
2211                    << PrintReg(NewRegister, MRI->getTargetRegisterInfo())
2212                    << "\n");
2213       llvm_unreachable("Cannot substitute physical registers");
2214       // We don't handle physical registers, but if we need to
2215       // in the future This is how we do it:
2216       // O.substPhysReg(NewRegister, *TRI);
2217     } else {
2218       DEBUG(dbgs() << "Replacing register: "
2219                    << PrintReg(Register, MRI->getTargetRegisterInfo())
2220                    << " with "
2221                    << PrintReg(NewRegister, MRI->getTargetRegisterInfo())
2222                    << "\n");
2223       O.setReg(NewRegister);
2224     }
2225   }
2226   PHIInfo.deleteDef(Register);
2227 
2228   getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
2229 
2230   DEBUG(PHIInfo.dump(MRI));
2231 }
2232 
2233 void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) {
2234   DEBUG(dbgs() << "Resolve PHI Infos\n");
2235   DEBUG(PHIInfo.dump(MRI));
2236   for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2237        ++DRI) {
2238     unsigned DestReg = *DRI;
2239     DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n");
2240     auto SRI = PHIInfo.sources_begin(DestReg);
2241     unsigned SourceReg = (*SRI).first;
2242     DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI)
2243                  << " SourceReg: " << PrintReg(SourceReg, TRI) << "\n");
2244 
2245     assert(PHIInfo.sources_end(DestReg) == ++SRI &&
2246            "More than one phi source in entry node");
2247     replaceRegisterWith(DestReg, SourceReg);
2248   }
2249 }
2250 
2251 static bool isFunctionEntryBlock(MachineBasicBlock *MBB) {
2252   return ((&(*(MBB->getParent()->begin()))) == MBB);
2253 }
2254 
2255 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2256     MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB,
2257     LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn,
2258     unsigned BBSelectRegOut) {
2259   if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) {
2260     // Handle non-loop function entry block.
2261     // We need to allow loops to the entry block and then
2262     rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2263     resolvePHIInfos(CodeBB);
2264     removeExternalCFGSuccessors(CodeBB);
2265     CodeBB->addSuccessor(MergeBB);
2266     CurrentRegion->addMBB(CodeBB);
2267     return nullptr;
2268   }
2269   if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) {
2270     // Handle non-loop region entry block.
2271     MachineFunction *MF = MergeBB->getParent();
2272     auto MergeIter = MergeBB->getIterator();
2273     auto CodeBBStartIter = CodeBB->getIterator();
2274     auto CodeBBEndIter = ++(CodeBB->getIterator());
2275     if (CodeBBEndIter != MergeIter) {
2276       MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter);
2277     }
2278     rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2279     prunePHIInfo(CodeBB);
2280     createEntryPHIs(CurrentRegion);
2281     removeExternalCFGSuccessors(CodeBB);
2282     CodeBB->addSuccessor(MergeBB);
2283     CurrentRegion->addMBB(CodeBB);
2284     return nullptr;
2285   } else {
2286     // Handle internal block.
2287     const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn);
2288     unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass);
2289     rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg);
2290     bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB;
2291     MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB,
2292                                             BBSelectRegIn, IsRegionEntryBB);
2293     CurrentRegion->addMBB(IfBB);
2294     // If this is the entry block we need to make the If block the new
2295     // linearized region entry.
2296     if (IsRegionEntryBB) {
2297       CurrentRegion->setEntry(IfBB);
2298 
2299       if (CurrentRegion->getHasLoop()) {
2300         MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2301         MachineBasicBlock *ETrueBB = nullptr;
2302         MachineBasicBlock *EFalseBB = nullptr;
2303         SmallVector<MachineOperand, 1> ECond;
2304 
2305         const DebugLoc &DL = DebugLoc();
2306         TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2307         TII->removeBranch(*RegionExit);
2308 
2309         // We need to create a backedge if there is a loop
2310         unsigned Reg = TII->insertNE(
2311             RegionExit, RegionExit->instr_end(), DL,
2312             CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2313             CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2314         MachineOperand RegOp =
2315             MachineOperand::CreateReg(Reg, false, false, true);
2316         ArrayRef<MachineOperand> Cond(RegOp);
2317         DEBUG(dbgs() << "RegionExitReg: ");
2318         DEBUG(Cond[0].print(dbgs(), TRI));
2319         DEBUG(dbgs() << "\n");
2320         TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2321                           Cond, DebugLoc());
2322         RegionExit->addSuccessor(CurrentRegion->getEntry());
2323       }
2324     }
2325     CurrentRegion->addMBB(CodeBB);
2326     LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo);
2327 
2328     InnerRegion.setParent(CurrentRegion);
2329     DEBUG(dbgs() << "Insert BB Select PHI (BB)\n");
2330     insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2331                    CodeBBSelectReg);
2332     InnerRegion.addMBB(MergeBB);
2333 
2334     DEBUG(InnerRegion.print(dbgs(), TRI));
2335     rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion);
2336     extractKilledPHIs(CodeBB);
2337     if (IsRegionEntryBB) {
2338       createEntryPHIs(CurrentRegion);
2339     }
2340     return IfBB;
2341   }
2342 }
2343 
2344 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2345     MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion,
2346     LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
2347     unsigned BBSelectRegIn, unsigned BBSelectRegOut) {
2348   unsigned CodeBBSelectReg =
2349       InnerRegion->getRegionMRT()->getInnerOutputRegister();
2350   MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry();
2351   MachineBasicBlock *CodeExitBB = InnerRegion->getExit();
2352   MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB,
2353                                           SelectBB, BBSelectRegIn, true);
2354   CurrentRegion->addMBB(IfBB);
2355   bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry();
2356   if (isEntry) {
2357 
2358     if (CurrentRegion->getHasLoop()) {
2359       MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2360       MachineBasicBlock *ETrueBB = nullptr;
2361       MachineBasicBlock *EFalseBB = nullptr;
2362       SmallVector<MachineOperand, 1> ECond;
2363 
2364       const DebugLoc &DL = DebugLoc();
2365       TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2366       TII->removeBranch(*RegionExit);
2367 
2368       // We need to create a backedge if there is a loop
2369       unsigned Reg =
2370           TII->insertNE(RegionExit, RegionExit->instr_end(), DL,
2371                         CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2372                         CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2373       MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
2374       ArrayRef<MachineOperand> Cond(RegOp);
2375       DEBUG(dbgs() << "RegionExitReg: ");
2376       DEBUG(Cond[0].print(dbgs(), TRI));
2377       DEBUG(dbgs() << "\n");
2378       TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2379                         Cond, DebugLoc());
2380       RegionExit->addSuccessor(IfBB);
2381     }
2382   }
2383   CurrentRegion->addMBBs(InnerRegion);
2384   DEBUG(dbgs() << "Insert BB Select PHI (region)\n");
2385   insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2386                  CodeBBSelectReg);
2387 
2388   rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion,
2389                      CurrentRegion);
2390 
2391   rewriteRegionEntryPHIs(InnerRegion, IfBB);
2392 
2393   if (isEntry) {
2394     CurrentRegion->setEntry(IfBB);
2395   }
2396 
2397   if (isEntry) {
2398     createEntryPHIs(CurrentRegion);
2399   }
2400 
2401   return IfBB;
2402 }
2403 
2404 void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI,
2405                                           MachineBasicBlock *Entry,
2406                                           MachineBasicBlock *EntrySucc,
2407                                           LinearizedRegion *LRegion) {
2408   SmallVector<unsigned, 2> PHIRegionIndices;
2409   getPHIRegionIndices(LRegion, PHI, PHIRegionIndices);
2410 
2411   assert(PHIRegionIndices.size() == 1);
2412 
2413   unsigned RegionIndex = PHIRegionIndices[0];
2414   unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex);
2415   MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex);
2416   unsigned PHIDest = getPHIDestReg(PHI);
2417   unsigned PHISource = PHIDest;
2418   unsigned ReplaceReg;
2419 
2420   if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) {
2421     PHISource = ReplaceReg;
2422   }
2423 
2424   const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest);
2425   unsigned NewDestReg = MRI->createVirtualRegister(RegClass);
2426   LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI);
2427   MachineInstrBuilder MIB =
2428       BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(),
2429               TII->get(TargetOpcode::PHI), NewDestReg);
2430   DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI)
2431                << "<def> = PHI(");
2432   MIB.addReg(PHISource);
2433   MIB.addMBB(Entry);
2434   DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber());
2435   MIB.addReg(RegionSourceReg);
2436   MIB.addMBB(RegionSourceMBB);
2437   DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << ", BB#"
2438                << RegionSourceMBB->getNumber() << ")\n");
2439 }
2440 
2441 void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry,
2442                                            MachineBasicBlock *EntrySucc,
2443                                            LinearizedRegion *LRegion) {
2444   SmallVector<MachineInstr *, 2> PHIs;
2445   collectPHIs(Entry, PHIs);
2446 
2447   for (auto PHII : PHIs) {
2448     splitLoopPHI(*PHII, Entry, EntrySucc, LRegion);
2449   }
2450 }
2451 
2452 // Split the exit block so that we can insert a end control flow
2453 MachineBasicBlock *
2454 AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) {
2455   auto MRTRegion = LRegion->getRegionMRT();
2456   auto Exit = LRegion->getExit();
2457   auto MF = Exit->getParent();
2458   auto Succ = MRTRegion->getSucc();
2459 
2460   auto NewExit = MF->CreateMachineBasicBlock();
2461   auto AfterExitIter = Exit->getIterator();
2462   AfterExitIter++;
2463   MF->insert(AfterExitIter, NewExit);
2464   Exit->removeSuccessor(Succ);
2465   Exit->addSuccessor(NewExit);
2466   NewExit->addSuccessor(Succ);
2467   insertUnconditionalBranch(NewExit, Succ);
2468   LRegion->addMBB(NewExit);
2469   LRegion->setExit(NewExit);
2470 
2471   DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n");
2472 
2473   // Replace any PHI Predecessors in the successor with NewExit
2474   for (auto &II : *Succ) {
2475     MachineInstr &Instr = II;
2476 
2477     // If we are past the PHI instructions we are done
2478     if (!Instr.isPHI())
2479       break;
2480 
2481     int numPreds = getPHINumInputs(Instr);
2482     for (int i = 0; i < numPreds; ++i) {
2483       auto Pred = getPHIPred(Instr, i);
2484       if (Pred == Exit) {
2485         setPhiPred(Instr, i, NewExit);
2486       }
2487     }
2488   }
2489 
2490   return NewExit;
2491 }
2492 
2493 
2494 static MachineBasicBlock *split(MachineBasicBlock::iterator I) {
2495   // Create the fall-through block.
2496   MachineBasicBlock *MBB = (*I).getParent();
2497   MachineFunction *MF = MBB->getParent();
2498   MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock();
2499   auto MBBIter = ++(MBB->getIterator());
2500   MF->insert(MBBIter, SuccMBB);
2501   SuccMBB->transferSuccessorsAndUpdatePHIs(MBB);
2502   MBB->addSuccessor(SuccMBB);
2503 
2504   // Splice the code over.
2505   SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end());
2506 
2507   return SuccMBB;
2508 }
2509 
2510 // Split the entry block separating PHI-nodes and the rest of the code
2511 // This is needed to insert an initializer for the bb select register
2512 // inloop regions.
2513 
2514 MachineBasicBlock *
2515 AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) {
2516   MachineBasicBlock *Entry = LRegion->getEntry();
2517   MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI());
2518   MachineBasicBlock *Exit = LRegion->getExit();
2519 
2520   DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#"
2521                << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber()
2522                << "\n");
2523   LRegion->addMBB(EntrySucc);
2524 
2525   // Make the backedge go to Entry Succ
2526   if (Exit->isSuccessor(Entry)) {
2527     Exit->removeSuccessor(Entry);
2528   }
2529   Exit->addSuccessor(EntrySucc);
2530   MachineInstr &Branch = *(Exit->instr_rbegin());
2531   for (auto &UI : Branch.uses()) {
2532     if (UI.isMBB() && UI.getMBB() == Entry) {
2533       UI.setMBB(EntrySucc);
2534     }
2535   }
2536 
2537   splitLoopPHIs(Entry, EntrySucc, LRegion);
2538 
2539   return EntrySucc;
2540 }
2541 
2542 LinearizedRegion *
2543 AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) {
2544   LinearizedRegion *LRegion = Region->getLinearizedRegion();
2545   LRegion->initLiveOut(Region, MRI, TRI, PHIInfo);
2546   LRegion->setEntry(Region->getEntry());
2547   return LRegion;
2548 }
2549 
2550 static void removeOldExitPreds(RegionMRT *Region) {
2551   MachineBasicBlock *Exit = Region->getSucc();
2552   if (Exit == nullptr) {
2553     return;
2554   }
2555   for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(),
2556                                         E = Exit->pred_end();
2557        PI != E; ++PI) {
2558     if (Region->contains(*PI)) {
2559       (*PI)->removeSuccessor(Exit);
2560     }
2561   }
2562 }
2563 
2564 static bool mbbHasBackEdge(MachineBasicBlock *MBB,
2565                            SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
2566   for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
2567     if (MBBs.count(*SI) != 0) {
2568       return true;
2569     }
2570   }
2571   return false;
2572 }
2573 
2574 static bool containsNewBackedge(MRT *Tree,
2575                                 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
2576   // Need to traverse this in reverse since it is in post order.
2577   if (Tree == nullptr)
2578     return false;
2579 
2580   if (Tree->isMBB()) {
2581     MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB();
2582     MBBs.insert(MBB);
2583     if (mbbHasBackEdge(MBB, MBBs)) {
2584       return true;
2585     }
2586   } else {
2587     RegionMRT *Region = Tree->getRegionMRT();
2588     SetVector<MRT *> *Children = Region->getChildren();
2589     for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) {
2590       if (containsNewBackedge(*CI, MBBs))
2591         return true;
2592     }
2593   }
2594   return false;
2595 }
2596 
2597 static bool containsNewBackedge(RegionMRT *Region) {
2598   SmallPtrSet<MachineBasicBlock *, 8> MBBs;
2599   return containsNewBackedge(Region, MBBs);
2600 }
2601 
2602 bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
2603   auto *LRegion = initLinearizedRegion(Region);
2604   LRegion->setHasLoop(containsNewBackedge(Region));
2605   MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region);
2606   MachineBasicBlock *CurrentMerge = LastMerge;
2607   LRegion->addMBB(LastMerge);
2608   LRegion->setExit(LastMerge);
2609 
2610   rewriteRegionExitPHIs(Region, LastMerge, LRegion);
2611   removeOldExitPreds(Region);
2612 
2613   DEBUG(PHIInfo.dump(MRI));
2614 
2615   SetVector<MRT *> *Children = Region->getChildren();
2616   DEBUG(dbgs() << "===========If Region Start===============\n");
2617   if (LRegion->getHasLoop()) {
2618     DEBUG(dbgs() << "Has Backedge: Yes\n");
2619   } else {
2620     DEBUG(dbgs() << "Has Backedge: No\n");
2621   }
2622 
2623   unsigned BBSelectRegIn;
2624   unsigned BBSelectRegOut;
2625   for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) {
2626     DEBUG(dbgs() << "CurrentRegion: \n");
2627     DEBUG(LRegion->print(dbgs(), TRI));
2628 
2629     auto CNI = CI;
2630     ++CNI;
2631 
2632     MRT *Child = (*CI);
2633 
2634     if (Child->isRegion()) {
2635 
2636       LinearizedRegion *InnerLRegion =
2637           Child->getRegionMRT()->getLinearizedRegion();
2638       // We found the block is the exit of an inner region, we need
2639       // to put it in the current linearized region.
2640 
2641       DEBUG(dbgs() << "Linearizing region: ");
2642       DEBUG(InnerLRegion->print(dbgs(), TRI));
2643       DEBUG(dbgs() << "\n");
2644 
2645       MachineBasicBlock *InnerEntry = InnerLRegion->getEntry();
2646       if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) {
2647         // Entry has already been linearized, no need to do this region.
2648         unsigned OuterSelect = InnerLRegion->getBBSelectRegOut();
2649         unsigned InnerSelectReg =
2650             InnerLRegion->getRegionMRT()->getInnerOutputRegister();
2651         replaceRegisterWith(InnerSelectReg, OuterSelect),
2652             resolvePHIInfos(InnerEntry);
2653         if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge))
2654           InnerLRegion->getExit()->addSuccessor(CurrentMerge);
2655         continue;
2656       }
2657 
2658       BBSelectRegOut = Child->getBBSelectRegOut();
2659       BBSelectRegIn = Child->getBBSelectRegIn();
2660 
2661       DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI)
2662                    << "\n");
2663       DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI)
2664                    << "\n");
2665 
2666       MachineBasicBlock *IfEnd = CurrentMerge;
2667       CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion,
2668                                     Child->getRegionMRT()->getEntry(),
2669                                     BBSelectRegIn, BBSelectRegOut);
2670       TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2671     } else {
2672       MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB();
2673       DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n");
2674 
2675       if (MBB == getSingleExitNode(*(MBB->getParent()))) {
2676         // If this is the exit block then we need to skip to the next.
2677         // The "in" register will be transferred to "out" in the next
2678         // iteration.
2679         continue;
2680       }
2681 
2682       BBSelectRegOut = Child->getBBSelectRegOut();
2683       BBSelectRegIn = Child->getBBSelectRegIn();
2684 
2685       DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI)
2686                    << "\n");
2687       DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI)
2688                    << "\n");
2689 
2690       MachineBasicBlock *IfEnd = CurrentMerge;
2691       // This is a basic block that is not part of an inner region, we
2692       // need to put it in the current linearized region.
2693       CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn,
2694                                     BBSelectRegOut);
2695       if (CurrentMerge) {
2696         TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2697       }
2698 
2699       DEBUG(PHIInfo.dump(MRI));
2700     }
2701   }
2702 
2703   LRegion->removeFalseRegisterKills(MRI);
2704 
2705   if (LRegion->getHasLoop()) {
2706     MachineBasicBlock *NewSucc = splitEntry(LRegion);
2707     if (isFunctionEntryBlock(LRegion->getEntry())) {
2708       resolvePHIInfos(LRegion->getEntry());
2709     }
2710     const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI());
2711     unsigned InReg = LRegion->getBBSelectRegIn();
2712     unsigned InnerSelectReg =
2713         MRI->createVirtualRegister(MRI->getRegClass(InReg));
2714     unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg));
2715     TII->materializeImmediate(*(LRegion->getEntry()),
2716                               LRegion->getEntry()->getFirstTerminator(), DL,
2717                               NewInReg, Region->getEntry()->getNumber());
2718     // Need to be careful about updating the registers inside the region.
2719     LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI);
2720     DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n");
2721     insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc,
2722                    InnerSelectReg, NewInReg,
2723                    LRegion->getRegionMRT()->getInnerOutputRegister());
2724     splitExit(LRegion);
2725     TII->convertNonUniformLoopRegion(NewSucc, LastMerge);
2726   }
2727 
2728   if (Region->isRoot()) {
2729     TII->insertReturn(*LastMerge);
2730   }
2731 
2732   DEBUG(Region->getEntry()->getParent()->dump());
2733   DEBUG(LRegion->print(dbgs(), TRI));
2734   DEBUG(PHIInfo.dump(MRI));
2735 
2736   DEBUG(dbgs() << "===========If Region End===============\n");
2737 
2738   Region->setLinearizedRegion(LRegion);
2739   return true;
2740 }
2741 
2742 bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) {
2743   if (false && regionIsSimpleIf(Region)) {
2744     transformSimpleIfRegion(Region);
2745     return true;
2746   } else if (regionIsSequence(Region)) {
2747     fixupRegionExits(Region);
2748     return false;
2749   } else {
2750     structurizeComplexRegion(Region);
2751   }
2752   return false;
2753 }
2754 
2755 static int structurize_once = 0;
2756 
2757 bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region,
2758                                                 bool isTopRegion) {
2759   bool Changed = false;
2760 
2761   auto Children = Region->getChildren();
2762   for (auto CI : *Children) {
2763     if (CI->isRegion()) {
2764       Changed |= structurizeRegions(CI->getRegionMRT(), false);
2765     }
2766   }
2767 
2768   if (structurize_once < 2 || true) {
2769     Changed |= structurizeRegion(Region);
2770     structurize_once++;
2771   }
2772   return Changed;
2773 }
2774 
2775 void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) {
2776   DEBUG(dbgs() << "Fallthrough Map:\n");
2777   for (auto &MBBI : MF) {
2778     MachineBasicBlock *MBB = MBBI.getFallThrough();
2779     if (MBB != nullptr) {
2780       DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> "
2781                    << MBB->getNumber() << "\n");
2782     }
2783     FallthroughMap[&MBBI] = MBB;
2784   }
2785 }
2786 
2787 void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region,
2788                                                     unsigned SelectOut) {
2789   LinearizedRegion *LRegion = new LinearizedRegion();
2790   if (SelectOut) {
2791     LRegion->addLiveOut(SelectOut);
2792     DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(SelectOut, TRI)
2793                  << "\n");
2794   }
2795   LRegion->setRegionMRT(Region);
2796   Region->setLinearizedRegion(LRegion);
2797   LRegion->setParent(Region->getParent()
2798                          ? Region->getParent()->getLinearizedRegion()
2799                          : nullptr);
2800 }
2801 
2802 unsigned
2803 AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut,
2804                                                   MachineRegisterInfo *MRI,
2805                                                   const SIInstrInfo *TII) {
2806   if (MRT->isRegion()) {
2807     RegionMRT *Region = MRT->getRegionMRT();
2808     Region->setBBSelectRegOut(SelectOut);
2809     unsigned InnerSelectOut = createBBSelectReg(TII, MRI);
2810 
2811     // Fixme: Move linearization creation to the original spot
2812     createLinearizedRegion(Region, SelectOut);
2813 
2814     for (auto CI = Region->getChildren()->begin(),
2815               CE = Region->getChildren()->end();
2816          CI != CE; ++CI) {
2817       InnerSelectOut =
2818           initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII);
2819     }
2820     MRT->setBBSelectRegIn(InnerSelectOut);
2821     return InnerSelectOut;
2822   } else {
2823     MRT->setBBSelectRegOut(SelectOut);
2824     unsigned NewSelectIn = createBBSelectReg(TII, MRI);
2825     MRT->setBBSelectRegIn(NewSelectIn);
2826     return NewSelectIn;
2827   }
2828 }
2829 
2830 static void checkRegOnlyPHIInputs(MachineFunction &MF) {
2831   for (auto &MBBI : MF) {
2832     for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(),
2833                                            E = MBBI.instr_end();
2834          I != E; ++I) {
2835       MachineInstr &Instr = *I;
2836       if (Instr.isPHI()) {
2837         int numPreds = getPHINumInputs(Instr);
2838         for (int i = 0; i < numPreds; ++i) {
2839           assert(Instr.getOperand(i * 2 + 1).isReg() &&
2840                  "PHI Operand not a register");
2841         }
2842       }
2843     }
2844   }
2845 }
2846 
2847 
2848 INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2849                       "AMDGPU Machine CFG Structurizer", false, false)
2850 INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass)
2851 INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2852                     "AMDGPU Machine CFG Structurizer", false, false)
2853 
2854 char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID;
2855 
2856 
2857 bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
2858   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
2859   const SIInstrInfo *TII = ST.getInstrInfo();
2860   TRI = ST.getRegisterInfo();
2861   MRI = &(MF.getRegInfo());
2862   initFallthroughMap(MF);
2863 
2864   checkRegOnlyPHIInputs(MF);
2865   DEBUG(dbgs() << "----STRUCTURIZER START----\n");
2866   DEBUG(MF.dump());
2867 
2868   Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo());
2869   DEBUG(Regions->dump());
2870 
2871   RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI);
2872   setRegionMRT(RTree);
2873   initializeSelectRegisters(RTree, 0, MRI, TII);
2874   DEBUG(RTree->dump(TRI));
2875   bool result = structurizeRegions(RTree, true);
2876   delete RTree;
2877   DEBUG(dbgs() << "----STRUCTURIZER END----\n");
2878   initFallthroughMap(MF);
2879   return result;
2880 }
2881 
2882 FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() {
2883   return new AMDGPUMachineCFGStructurizer();
2884 }
2885