1 //===-- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. -----===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements the machine instruction level CFG structurizer pass.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "AMDGPU.h"
15 #include "SIInstrInfo.h"
16 #include "AMDGPUSubtarget.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/PostOrderIterator.h"
19 #include "llvm/ADT/SetVector.h"
20 #include "llvm/ADT/SmallPtrSet.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/Analysis/CFG.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineFunctionPass.h"
25 #include "llvm/CodeGen/MachineInstr.h"
26 #include "llvm/CodeGen/MachineInstrBuilder.h"
27 #include "llvm/CodeGen/MachineRegionInfo.h"
28 #include "llvm/CodeGen/MachineRegisterInfo.h"
29 #include "llvm/CodeGen/Passes.h"
30 #include "llvm/IR/DebugLoc.h"
31 #include "llvm/Support/Debug.h"
32 #include "llvm/Target/TargetInstrInfo.h"
33 #include "llvm/Target/TargetLowering.h"
34 #include "llvm/Target/TargetSubtargetInfo.h"
35 #include <tuple>
36 using namespace llvm;
37 
38 #define DEBUG_TYPE "amdgpucfgstructurizer"
39 
40 namespace {
41 class PHILinearizeDestIterator;
42 
43 class PHILinearize {
44   friend class PHILinearizeDestIterator;
45 
46 public:
47   typedef std::pair<unsigned, MachineBasicBlock *> PHISourceT;
48 
49 private:
50   typedef DenseSet<PHISourceT> PHISourcesT;
51   typedef struct {
52     unsigned DestReg;
53     DebugLoc DL;
54     PHISourcesT Sources;
55   } PHIInfoElementT;
56   typedef SmallPtrSet<PHIInfoElementT *, 2> PHIInfoT;
57   PHIInfoT PHIInfo;
58 
59   static unsigned phiInfoElementGetDest(PHIInfoElementT *Info);
60   static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef);
61   static DebugLoc phiInfoElementGetDebugLoc(PHIInfoElementT *Info);
62   static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info);
63   static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg,
64                                       MachineBasicBlock *SourceMBB);
65   static void phiInfoElementRemoveSource(PHIInfoElementT *Info,
66                                          unsigned SourceReg,
67                                          MachineBasicBlock *SourceMBB);
68   PHIInfoElementT *findPHIInfoElement(unsigned DestReg);
69   PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg,
70                                                 MachineBasicBlock *SourceMBB);
71 
72 public:
73   bool findSourcesFromMBB(MachineBasicBlock *SourceMBB,
74                           SmallVector<unsigned, 4> &Sources);
75   void addDest(unsigned DestReg, const DebugLoc &DL);
76   void replaceDef(unsigned OldDestReg, unsigned NewDestReg);
77   void deleteDef(unsigned DestReg);
78   DebugLoc getDebugLoc(unsigned DestReg);
79   void addSource(unsigned DestReg, unsigned SourceReg,
80                  MachineBasicBlock *SourceMBB);
81   void removeSource(unsigned DestReg, unsigned SourceReg,
82                     MachineBasicBlock *SourceMBB = nullptr);
83   bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
84                 unsigned &DestReg);
85   bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr);
86   unsigned getNumSources(unsigned DestReg);
87   void dump(MachineRegisterInfo *MRI);
88   void clear();
89 
90   typedef PHISourcesT::iterator source_iterator;
91   typedef PHILinearizeDestIterator dest_iterator;
92 
93   dest_iterator dests_begin();
94   dest_iterator dests_end();
95 
96   source_iterator sources_begin(unsigned Reg);
97   source_iterator sources_end(unsigned Reg);
98 };
99 
100 class PHILinearizeDestIterator {
101 private:
102   PHILinearize::PHIInfoT::iterator Iter;
103 
104 public:
105   unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); }
106   PHILinearizeDestIterator &operator++() {
107     ++Iter;
108     return *this;
109   }
110   bool operator==(const PHILinearizeDestIterator &I) const {
111     return I.Iter == Iter;
112   }
113   bool operator!=(const PHILinearizeDestIterator &I) const {
114     return I.Iter != Iter;
115   }
116 
117   PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {}
118 };
119 
120 unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) {
121   return Info->DestReg;
122 }
123 
124 void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info,
125                                         unsigned NewDef) {
126   Info->DestReg = NewDef;
127 }
128 
129 DebugLoc PHILinearize::phiInfoElementGetDebugLoc(PHIInfoElementT *Info) {
130   return Info->DL;
131 }
132 
133 PHILinearize::PHISourcesT &
134 PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) {
135   return Info->Sources;
136 }
137 
138 void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info,
139                                            unsigned SourceReg,
140                                            MachineBasicBlock *SourceMBB) {
141   // Assertion ensures we don't use the same SourceMBB for the
142   // sources, because we cannot have different registers with
143   // identical predecessors, but we can have the same register for
144   // multiple predecessors.
145   for (auto SI : phiInfoElementGetSources(Info)) {
146     assert((SI.second != SourceMBB || SourceReg == SI.first));
147   }
148 
149   phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB));
150 }
151 
152 void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info,
153                                               unsigned SourceReg,
154                                               MachineBasicBlock *SourceMBB) {
155   auto &Sources = phiInfoElementGetSources(Info);
156   SmallVector<PHISourceT, 4> ElimiatedSources;
157   for (auto SI : Sources) {
158     if (SI.first == SourceReg &&
159         (SI.second == nullptr || SI.second == SourceMBB)) {
160       ElimiatedSources.push_back(PHISourceT(SI.first, SI.second));
161     }
162   }
163 
164   for (auto &Source : ElimiatedSources) {
165     Sources.erase(Source);
166   }
167 }
168 
169 PHILinearize::PHIInfoElementT *
170 PHILinearize::findPHIInfoElement(unsigned DestReg) {
171   for (auto I : PHIInfo) {
172     if (phiInfoElementGetDest(I) == DestReg) {
173       return I;
174     }
175   }
176   return nullptr;
177 }
178 
179 PHILinearize::PHIInfoElementT *
180 PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg,
181                                            MachineBasicBlock *SourceMBB) {
182   for (auto I : PHIInfo) {
183     for (auto SI : phiInfoElementGetSources(I)) {
184       if (SI.first == SourceReg &&
185           (SI.second == nullptr || SI.second == SourceMBB)) {
186         return I;
187       }
188     }
189   }
190   return nullptr;
191 }
192 
193 bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB,
194                                       SmallVector<unsigned, 4> &Sources) {
195   bool FoundSource = false;
196   for (auto I : PHIInfo) {
197     for (auto SI : phiInfoElementGetSources(I)) {
198       if (SI.second == SourceMBB) {
199         FoundSource = true;
200         Sources.push_back(SI.first);
201       }
202     }
203   }
204   return FoundSource;
205 }
206 
207 void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) {
208   assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists");
209   PHISourcesT EmptySet;
210   PHIInfoElementT *NewElement = new PHIInfoElementT();
211   NewElement->DestReg = DestReg;
212   NewElement->DL = DL;
213   NewElement->Sources = EmptySet;
214   PHIInfo.insert(NewElement);
215 }
216 
217 void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) {
218   phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg);
219 }
220 
221 void PHILinearize::deleteDef(unsigned DestReg) {
222   PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg);
223   PHIInfo.erase(InfoElement);
224   delete InfoElement;
225 }
226 
227 DebugLoc PHILinearize::getDebugLoc(unsigned DestReg) {
228   return phiInfoElementGetDebugLoc(findPHIInfoElement(DestReg));
229 }
230 
231 void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg,
232                              MachineBasicBlock *SourceMBB) {
233   phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
234 }
235 
236 void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg,
237                                 MachineBasicBlock *SourceMBB) {
238   phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB);
239 }
240 
241 bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB,
242                             unsigned &DestReg) {
243   PHIInfoElementT *InfoElement =
244       findPHIInfoElementFromSource(SourceReg, SourceMBB);
245   if (InfoElement != nullptr) {
246     DestReg = phiInfoElementGetDest(InfoElement);
247     return true;
248   }
249   return false;
250 }
251 
252 bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) {
253   unsigned DestReg;
254   return findDest(Reg, SourceMBB, DestReg);
255 }
256 
257 unsigned PHILinearize::getNumSources(unsigned DestReg) {
258   return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size();
259 }
260 
261 void PHILinearize::dump(MachineRegisterInfo *MRI) {
262   const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
263   DEBUG(dbgs() << "=PHIInfo Start=\n");
264   for (auto PII : this->PHIInfo) {
265     PHIInfoElementT &Element = *PII;
266     DEBUG(dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI)
267                  << " Sources: {");
268     for (auto &SI : Element.Sources) {
269       DEBUG(dbgs() << PrintReg(SI.first, TRI) << "(BB#"
270                    << SI.second->getNumber() << "),");
271     }
272     DEBUG(dbgs() << "}\n");
273   }
274   DEBUG(dbgs() << "=PHIInfo End=\n");
275 }
276 
277 void PHILinearize::clear() { PHIInfo = PHIInfoT(); }
278 
279 PHILinearize::dest_iterator PHILinearize::dests_begin() {
280   return PHILinearizeDestIterator(PHIInfo.begin());
281 }
282 
283 PHILinearize::dest_iterator PHILinearize::dests_end() {
284   return PHILinearizeDestIterator(PHIInfo.end());
285 }
286 
287 PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) {
288   auto InfoElement = findPHIInfoElement(Reg);
289   return phiInfoElementGetSources(InfoElement).begin();
290 }
291 PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) {
292   auto InfoElement = findPHIInfoElement(Reg);
293   return phiInfoElementGetSources(InfoElement).end();
294 }
295 
296 class RegionMRT;
297 class MBBMRT;
298 
299 static unsigned getPHINumInputs(MachineInstr &PHI) {
300   assert(PHI.isPHI());
301   return (PHI.getNumOperands() - 1) / 2;
302 }
303 
304 static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) {
305   assert(PHI.isPHI());
306   return PHI.getOperand(Index * 2 + 2).getMBB();
307 }
308 
309 static void setPhiPred(MachineInstr &PHI, unsigned Index,
310                        MachineBasicBlock *NewPred) {
311   PHI.getOperand(Index * 2 + 2).setMBB(NewPred);
312 }
313 
314 static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) {
315   assert(PHI.isPHI());
316   return PHI.getOperand(Index * 2 + 1).getReg();
317 }
318 
319 static unsigned getPHIDestReg(MachineInstr &PHI) {
320   assert(PHI.isPHI());
321   return PHI.getOperand(0).getReg();
322 }
323 
324 class LinearizedRegion {
325 protected:
326   MachineBasicBlock *Entry;
327   // The exit block is part of the region, and is the last
328   // merge block before exiting the region.
329   MachineBasicBlock *Exit;
330   DenseSet<unsigned> LiveOuts;
331   SmallPtrSet<MachineBasicBlock *, 1> MBBs;
332   bool HasLoop;
333   LinearizedRegion *Parent;
334   RegionMRT *RMRT;
335 
336   void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
337                        MachineInstr *DefInstr, const MachineRegisterInfo *MRI,
338                        const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
339 
340   void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
341                              MachineInstr *DefInstr,
342                              const MachineRegisterInfo *MRI,
343                              const TargetRegisterInfo *TRI,
344                              PHILinearize &PHIInfo);
345 
346   void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
347                         const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
348                         RegionMRT *TopRegion);
349 
350   void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
351                      const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
352 
353   void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI,
354                      const TargetRegisterInfo *TRI, PHILinearize &PHIInfo,
355                      RegionMRT *TopRegion = nullptr);
356 
357 public:
358   void setRegionMRT(RegionMRT *Region) { RMRT = Region; }
359 
360   RegionMRT *getRegionMRT() { return RMRT; }
361 
362   void setParent(LinearizedRegion *P) { Parent = P; }
363 
364   LinearizedRegion *getParent() { return Parent; }
365 
366   void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr);
367 
368   void setBBSelectRegIn(unsigned Reg);
369 
370   unsigned getBBSelectRegIn();
371 
372   void setBBSelectRegOut(unsigned Reg, bool IsLiveOut);
373 
374   unsigned getBBSelectRegOut();
375 
376   void setHasLoop(bool Value);
377 
378   bool getHasLoop();
379 
380   void addLiveOut(unsigned VReg);
381 
382   void addLiveOuts(LinearizedRegion *LRegion);
383 
384   void removeLiveOut(unsigned Reg);
385 
386   void replaceLiveOut(unsigned OldReg, unsigned NewReg);
387 
388   void replaceRegister(unsigned Register, unsigned NewRegister,
389                        MachineRegisterInfo *MRI, bool ReplaceInside,
390                        bool ReplaceOutside, bool IncludeLoopPHIs);
391 
392   void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister,
393                                    bool IncludeLoopPHIs,
394                                    MachineRegisterInfo *MRI);
395 
396   void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister,
397                                     bool IncludeLoopPHIs,
398                                     MachineRegisterInfo *MRI);
399 
400   DenseSet<unsigned> *getLiveOuts();
401 
402   void setEntry(MachineBasicBlock *NewEntry);
403 
404   MachineBasicBlock *getEntry();
405 
406   void setExit(MachineBasicBlock *NewExit);
407 
408   MachineBasicBlock *getExit();
409 
410   void addMBB(MachineBasicBlock *MBB);
411 
412   void addMBBs(LinearizedRegion *InnerRegion);
413 
414   bool contains(MachineBasicBlock *MBB);
415 
416   bool isLiveOut(unsigned Reg);
417 
418   bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI);
419 
420   bool isDefinedInRegion(unsigned Reg, MachineRegisterInfo *MRI);
421 
422   void removeFalseRegisterKills(MachineRegisterInfo *MRI);
423 
424   void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI,
425                    const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
426 
427   LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI,
428                    const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
429 
430   LinearizedRegion(RegionMRT *Region, const MachineRegisterInfo *MRI,
431                    const TargetRegisterInfo *TRI, PHILinearize &PHIInfo);
432 
433   LinearizedRegion();
434 
435   ~LinearizedRegion();
436 };
437 
438 class MRT {
439 protected:
440   RegionMRT *Parent;
441   unsigned BBSelectRegIn;
442   unsigned BBSelectRegOut;
443 
444 public:
445   unsigned getBBSelectRegIn() { return BBSelectRegIn; }
446 
447   unsigned getBBSelectRegOut() { return BBSelectRegOut; }
448 
449   void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; }
450 
451   void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; }
452 
453   virtual RegionMRT *getRegionMRT() { return nullptr; }
454 
455   virtual MBBMRT *getMBBMRT() { return nullptr; }
456 
457   bool isRegion() { return getRegionMRT() != nullptr; }
458 
459   bool isMBB() { return getMBBMRT() != nullptr; }
460 
461   bool isRoot() { return Parent == nullptr; }
462 
463   void setParent(RegionMRT *Region) { Parent = Region; }
464 
465   RegionMRT *getParent() { return Parent; }
466 
467   static MachineBasicBlock *
468   initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
469                 DenseMap<MachineRegion *, RegionMRT *> &RegionMap);
470 
471   static RegionMRT *buildMRT(MachineFunction &MF,
472                              const MachineRegionInfo *RegionInfo,
473                              const SIInstrInfo *TII,
474                              MachineRegisterInfo *MRI);
475 
476   virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0;
477 
478   void dumpDepth(int depth) {
479     for (int i = depth; i > 0; --i) {
480       dbgs() << "  ";
481     }
482   }
483 
484   virtual ~MRT() {}
485 };
486 
487 class MBBMRT : public MRT {
488   MachineBasicBlock *MBB;
489 
490 public:
491   virtual MBBMRT *getMBBMRT() { return this; }
492 
493   MachineBasicBlock *getMBB() { return MBB; }
494 
495   virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) {
496     dumpDepth(depth);
497     dbgs() << "MBB: " << getMBB()->getNumber();
498     dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI);
499     dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n";
500   }
501 
502   MBBMRT(MachineBasicBlock *BB) : MBB(BB) {
503     setParent(nullptr);
504     setBBSelectRegOut(0);
505     setBBSelectRegIn(0);
506   }
507 };
508 
509 class RegionMRT : public MRT {
510 protected:
511   MachineRegion *Region;
512   LinearizedRegion *LRegion;
513   MachineBasicBlock *Succ;
514 
515   SetVector<MRT *> Children;
516 
517 public:
518   virtual RegionMRT *getRegionMRT() { return this; }
519 
520   void setLinearizedRegion(LinearizedRegion *LinearizeRegion) {
521     LRegion = LinearizeRegion;
522   }
523 
524   LinearizedRegion *getLinearizedRegion() { return LRegion; }
525 
526   MachineRegion *getMachineRegion() { return Region; }
527 
528   unsigned getInnerOutputRegister() {
529     return (*(Children.begin()))->getBBSelectRegOut();
530   }
531 
532   void addChild(MRT *Tree) { Children.insert(Tree); }
533 
534   SetVector<MRT *> *getChildren() { return &Children; }
535 
536   virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) {
537     dumpDepth(depth);
538     dbgs() << "Region: " << (void *)Region;
539     dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI);
540     dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n";
541 
542     dumpDepth(depth);
543     if (getSucc())
544       dbgs() << "Succ: " << getSucc()->getNumber() << "\n";
545     else
546       dbgs() << "Succ: none \n";
547     for (auto MRTI : Children) {
548       MRTI->dump(TRI, depth + 1);
549     }
550   }
551 
552   MRT *getEntryTree() { return Children.back(); }
553 
554   MRT *getExitTree() { return Children.front(); }
555 
556   MachineBasicBlock *getEntry() {
557     MRT *Tree = Children.back();
558     return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry()
559                               : Tree->getMBBMRT()->getMBB();
560   }
561 
562   MachineBasicBlock *getExit() {
563     MRT *Tree = Children.front();
564     return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit()
565                               : Tree->getMBBMRT()->getMBB();
566   }
567 
568   void setSucc(MachineBasicBlock *MBB) { Succ = MBB; }
569 
570   MachineBasicBlock *getSucc() { return Succ; }
571 
572   bool contains(MachineBasicBlock *MBB) {
573     for (auto CI : Children) {
574       if (CI->isMBB()) {
575         if (MBB == CI->getMBBMRT()->getMBB()) {
576           return true;
577         }
578       } else {
579         if (CI->getRegionMRT()->contains(MBB)) {
580           return true;
581         } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr &&
582                    CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) {
583           return true;
584         }
585       }
586     }
587     return false;
588   }
589 
590   void replaceLiveOutReg(unsigned Register, unsigned NewRegister) {
591     LinearizedRegion *LRegion = getLinearizedRegion();
592     LRegion->replaceLiveOut(Register, NewRegister);
593     for (auto &CI : Children) {
594       if (CI->isRegion()) {
595         CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
596       }
597     }
598   }
599 
600   RegionMRT(MachineRegion *MachineRegion)
601       : Region(MachineRegion), LRegion(nullptr), Succ(nullptr) {
602     setParent(nullptr);
603     setBBSelectRegOut(0);
604     setBBSelectRegIn(0);
605   }
606 
607   virtual ~RegionMRT() {
608     if (LRegion) {
609       delete LRegion;
610     }
611 
612     for (auto CI : Children) {
613       delete &(*CI);
614     }
615   }
616 };
617 
618 static unsigned createBBSelectReg(const SIInstrInfo *TII,
619                                   MachineRegisterInfo *MRI) {
620   return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32));
621 }
622 
623 MachineBasicBlock *
624 MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo,
625                    DenseMap<MachineRegion *, RegionMRT *> &RegionMap) {
626   for (auto &MFI : MF) {
627     MachineBasicBlock *ExitMBB = &MFI;
628     if (ExitMBB->succ_size() == 0) {
629       return ExitMBB;
630     }
631   }
632   llvm_unreachable("CFG has no exit block");
633   return nullptr;
634 }
635 
636 RegionMRT *MRT::buildMRT(MachineFunction &MF,
637                          const MachineRegionInfo *RegionInfo,
638                          const SIInstrInfo *TII, MachineRegisterInfo *MRI) {
639   SmallPtrSet<MachineRegion *, 4> PlacedRegions;
640   DenseMap<MachineRegion *, RegionMRT *> RegionMap;
641   MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion();
642   RegionMRT *Result = new RegionMRT(TopLevelRegion);
643   RegionMap[TopLevelRegion] = Result;
644 
645   // Insert the exit block first, we need it to be the merge node
646   // for the top level region.
647   MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap);
648 
649   unsigned BBSelectRegIn = createBBSelectReg(TII, MRI);
650   MBBMRT *ExitMRT = new MBBMRT(Exit);
651   RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT);
652   ExitMRT->setBBSelectRegIn(BBSelectRegIn);
653 
654   for (auto MBBI : post_order(&(MF.front()))) {
655     MachineBasicBlock *MBB = &(*MBBI);
656 
657     // Skip Exit since we already added it
658     if (MBB == Exit) {
659       continue;
660     }
661 
662     DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n");
663     MBBMRT *NewMBB = new MBBMRT(MBB);
664     MachineRegion *Region = RegionInfo->getRegionFor(MBB);
665 
666     // Ensure we have the MRT region
667     if (RegionMap.count(Region) == 0) {
668       RegionMRT *NewMRTRegion = new RegionMRT(Region);
669       RegionMap[Region] = NewMRTRegion;
670 
671       // Ensure all parents are in the RegionMap
672       MachineRegion *Parent = Region->getParent();
673       while (RegionMap.count(Parent) == 0) {
674         RegionMRT *NewMRTParent = new RegionMRT(Parent);
675         NewMRTParent->addChild(NewMRTRegion);
676         NewMRTRegion->setParent(NewMRTParent);
677         RegionMap[Parent] = NewMRTParent;
678         NewMRTRegion = NewMRTParent;
679         Parent = Parent->getParent();
680       }
681       RegionMap[Parent]->addChild(NewMRTRegion);
682       NewMRTRegion->setParent(RegionMap[Parent]);
683     }
684 
685     // Add MBB to Region MRT
686     RegionMap[Region]->addChild(NewMBB);
687     NewMBB->setParent(RegionMap[Region]);
688     RegionMap[Region]->setSucc(Region->getExit());
689   }
690   return Result;
691 }
692 
693 void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg,
694                                        MachineInstr *DefInstr,
695                                        const MachineRegisterInfo *MRI,
696                                        const TargetRegisterInfo *TRI,
697                                        PHILinearize &PHIInfo) {
698   if (TRI->isVirtualRegister(Reg)) {
699     DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n");
700     // If this is a source register to a PHI we are chaining, it
701     // must be live out.
702     if (PHIInfo.isSource(Reg)) {
703       DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n");
704       addLiveOut(Reg);
705     } else {
706       // If this is live out of the MBB
707       for (auto &UI : MRI->use_operands(Reg)) {
708         if (UI.getParent()->getParent() != MBB) {
709           DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber()
710                        << "): " << PrintReg(Reg, TRI) << "\n");
711           addLiveOut(Reg);
712         } else {
713           // If the use is in the same MBB we have to make sure
714           // it is after the def, otherwise it is live out in a loop
715           MachineInstr *UseInstr = UI.getParent();
716           for (MachineBasicBlock::instr_iterator
717                    MII = UseInstr->getIterator(),
718                    MIE = UseInstr->getParent()->instr_end();
719                MII != MIE; ++MII) {
720             if ((&(*MII)) == DefInstr) {
721               DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI)
722                            << "\n");
723               addLiveOut(Reg);
724             }
725           }
726         }
727       }
728     }
729   }
730 }
731 
732 void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg,
733                                              MachineInstr *DefInstr,
734                                              const MachineRegisterInfo *MRI,
735                                              const TargetRegisterInfo *TRI,
736                                              PHILinearize &PHIInfo) {
737   if (TRI->isVirtualRegister(Reg)) {
738     DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n");
739     for (auto &UI : MRI->use_operands(Reg)) {
740       if (!Region->contains(UI.getParent()->getParent())) {
741         DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region
742                      << "): " << PrintReg(Reg, TRI) << "\n");
743         addLiveOut(Reg);
744       }
745     }
746   }
747 }
748 
749 void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB,
750                                      const MachineRegisterInfo *MRI,
751                                      const TargetRegisterInfo *TRI,
752                                      PHILinearize &PHIInfo) {
753   DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n");
754   for (auto &II : *MBB) {
755     for (auto &RI : II.defs()) {
756       storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo);
757     }
758     for (auto &IRI : II.implicit_operands()) {
759       if (IRI.isDef()) {
760         storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo);
761       }
762     }
763   }
764 
765   // If we have a successor with a PHI, source coming from this MBB we have to
766   // add the register as live out
767   for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
768                                         E = MBB->succ_end();
769        SI != E; ++SI) {
770     for (auto &II : *(*SI)) {
771       if (II.isPHI()) {
772         MachineInstr &PHI = II;
773         int numPreds = getPHINumInputs(PHI);
774         for (int i = 0; i < numPreds; ++i) {
775           if (getPHIPred(PHI, i) == MBB) {
776             unsigned PHIReg = getPHISourceReg(PHI, i);
777             DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber()
778                          << " -> BB#" << (*SI)->getNumber()
779                          << "): " << PrintReg(PHIReg, TRI) << "\n");
780             addLiveOut(PHIReg);
781           }
782         }
783       }
784     }
785   }
786 
787   DEBUG(dbgs() << "-Store Live Outs Endn-\n");
788 }
789 
790 void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB,
791                                         const MachineRegisterInfo *MRI,
792                                         const TargetRegisterInfo *TRI,
793                                         PHILinearize &PHIInfo,
794                                         RegionMRT *TopRegion) {
795   for (auto &II : *MBB) {
796     for (auto &RI : II.defs()) {
797       storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI,
798                             PHIInfo);
799     }
800     for (auto &IRI : II.implicit_operands()) {
801       if (IRI.isDef()) {
802         storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI,
803                               TRI, PHIInfo);
804       }
805     }
806   }
807 }
808 
809 void LinearizedRegion::storeLiveOuts(RegionMRT *Region,
810                                      const MachineRegisterInfo *MRI,
811                                      const TargetRegisterInfo *TRI,
812                                      PHILinearize &PHIInfo,
813                                      RegionMRT *CurrentTopRegion) {
814   MachineBasicBlock *Exit = Region->getSucc();
815 
816   RegionMRT *TopRegion =
817       CurrentTopRegion == nullptr ? Region : CurrentTopRegion;
818 
819   // Check if exit is end of function, if so, no live outs.
820   if (Exit == nullptr)
821     return;
822 
823   auto Children = Region->getChildren();
824   for (auto CI : *Children) {
825     if (CI->isMBB()) {
826       auto MBB = CI->getMBBMRT()->getMBB();
827       storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion);
828     } else {
829       LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion();
830       // We should be limited to only store registers that are live out from the
831       // lineaized region
832       for (auto MBBI : SubRegion->MBBs) {
833         storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion);
834       }
835     }
836   }
837 
838   if (CurrentTopRegion == nullptr) {
839     auto Succ = Region->getSucc();
840     for (auto &II : *Succ) {
841       if (II.isPHI()) {
842         MachineInstr &PHI = II;
843         int numPreds = getPHINumInputs(PHI);
844         for (int i = 0; i < numPreds; ++i) {
845           if (Region->contains(getPHIPred(PHI, i))) {
846             unsigned PHIReg = getPHISourceReg(PHI, i);
847             DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region
848                          << "): " << PrintReg(PHIReg, TRI) << "\n");
849             addLiveOut(PHIReg);
850           }
851         }
852       }
853     }
854   }
855 }
856 
857 void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) {
858   OS << "Linearized Region {";
859   bool IsFirst = true;
860   for (const auto &MBB : MBBs) {
861     if (IsFirst) {
862       IsFirst = false;
863     } else {
864       OS << " ,";
865     }
866     OS << MBB->getNumber();
867   }
868   OS << "} (" << Entry->getNumber() << ", "
869      << (Exit == nullptr ? -1 : Exit->getNumber())
870      << "): In:" << PrintReg(getBBSelectRegIn(), TRI)
871      << " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {";
872   for (auto &LI : LiveOuts) {
873     OS << PrintReg(LI, TRI) << " ";
874   }
875   OS << "} \n";
876 }
877 
878 unsigned LinearizedRegion::getBBSelectRegIn() {
879   return getRegionMRT()->getBBSelectRegIn();
880 }
881 
882 unsigned LinearizedRegion::getBBSelectRegOut() {
883   return getRegionMRT()->getBBSelectRegOut();
884 }
885 
886 void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; }
887 
888 bool LinearizedRegion::getHasLoop() { return HasLoop; }
889 
890 void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); }
891 
892 void LinearizedRegion::addLiveOuts(LinearizedRegion *LRegion) {
893   DenseSet<unsigned> *RegionLiveOuts = LRegion->getLiveOuts();
894   for (auto R : *RegionLiveOuts) {
895     addLiveOut(R);
896   }
897 }
898 
899 void LinearizedRegion::removeLiveOut(unsigned Reg) {
900   if (isLiveOut(Reg))
901     LiveOuts.erase(Reg);
902 }
903 
904 void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) {
905   if (isLiveOut(OldReg)) {
906     removeLiveOut(OldReg);
907     addLiveOut(NewReg);
908   }
909 }
910 
911 void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister,
912                                        MachineRegisterInfo *MRI,
913                                        bool ReplaceInside, bool ReplaceOutside,
914                                        bool IncludeLoopPHI) {
915   assert(Register != NewRegister && "Cannot replace a reg with itself");
916 
917   DEBUG(dbgs() << "Pepareing to replace register (region): "
918                << PrintReg(Register, MRI->getTargetRegisterInfo()) << " with "
919                << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n");
920 
921   // If we are replacing outside, we also need to update the LiveOuts
922   if (ReplaceOutside &&
923       (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) {
924     LinearizedRegion *Current = this;
925     while (Current != nullptr && Current->getEntry() != nullptr) {
926       DEBUG(dbgs() << "Region before register replace\n");
927       DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
928       Current->replaceLiveOut(Register, NewRegister);
929       DEBUG(dbgs() << "Region after register replace\n");
930       DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo()));
931       Current = Current->getParent();
932     }
933   }
934 
935   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
936                                          E = MRI->reg_end();
937        I != E;) {
938     MachineOperand &O = *I;
939     ++I;
940 
941     // We don't rewrite defs.
942     if (O.isDef())
943       continue;
944 
945     bool IsInside = contains(O.getParent()->getParent());
946     bool IsLoopPHI = IsInside && (O.getParent()->isPHI() &&
947                                   O.getParent()->getParent() == getEntry());
948     bool ShouldReplace = (IsInside && ReplaceInside) ||
949                          (!IsInside && ReplaceOutside) ||
950                          (IncludeLoopPHI && IsLoopPHI);
951     if (ShouldReplace) {
952 
953       if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
954         DEBUG(dbgs() << "Trying to substitute physical register: "
955                      << PrintReg(NewRegister, MRI->getTargetRegisterInfo())
956                      << "\n");
957         llvm_unreachable("Cannot substitute physical registers");
958       } else {
959         DEBUG(dbgs() << "Replacing register (region): "
960                      << PrintReg(Register, MRI->getTargetRegisterInfo())
961                      << " with "
962                      << PrintReg(NewRegister, MRI->getTargetRegisterInfo())
963                      << "\n");
964         O.setReg(NewRegister);
965       }
966     }
967   }
968 }
969 
970 void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register,
971                                                    unsigned NewRegister,
972                                                    bool IncludeLoopPHIs,
973                                                    MachineRegisterInfo *MRI) {
974   replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs);
975 }
976 
977 void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register,
978                                                     unsigned NewRegister,
979                                                     bool IncludeLoopPHIs,
980                                                     MachineRegisterInfo *MRI) {
981   replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs);
982 }
983 
984 DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; }
985 
986 void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) {
987   Entry = NewEntry;
988 }
989 
990 MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; }
991 
992 void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; }
993 
994 MachineBasicBlock *LinearizedRegion::getExit() { return Exit; }
995 
996 void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); }
997 
998 void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) {
999   for (const auto &MBB : InnerRegion->MBBs) {
1000     addMBB(MBB);
1001   }
1002 }
1003 
1004 bool LinearizedRegion::contains(MachineBasicBlock *MBB) {
1005   return MBBs.count(MBB) == 1;
1006 }
1007 
1008 bool LinearizedRegion::isLiveOut(unsigned Reg) {
1009   return LiveOuts.count(Reg) == 1;
1010 }
1011 
1012 bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) {
1013   return MRI->def_begin(Reg) == MRI->def_end();
1014 }
1015 
1016 bool LinearizedRegion::isDefinedInRegion(unsigned Reg,
1017                                          MachineRegisterInfo *MRI) {
1018   bool NoDef = hasNoDef(Reg, MRI);
1019   if (NoDef) {
1020     return false;
1021   }
1022 
1023   if (!MRI->hasOneDef(Reg)) {
1024     DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo())
1025                  << " has multiple defs\n");
1026   }
1027 
1028   assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1029   MachineOperand *Def = &(*(MRI->def_begin(Reg)));
1030   return contains(Def->getParent()->getParent());
1031 }
1032 
1033 // After the code has been structurized, what was flagged as kills
1034 // before are no longer register kills.
1035 void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) {
1036   const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo();
1037   for (auto MBBI : MBBs) {
1038     MachineBasicBlock *MBB = MBBI;
1039     for (auto &II : *MBB) {
1040       for (auto &RI : II.uses()) {
1041         if (RI.isReg()) {
1042           unsigned Reg = RI.getReg();
1043           if (TRI->isVirtualRegister(Reg)) {
1044             if (hasNoDef(Reg, MRI))
1045               continue;
1046             if (!MRI->hasOneDef(Reg)) {
1047               DEBUG(this->getEntry()->getParent()->dump());
1048               DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n");
1049             }
1050 
1051             if (MRI->def_begin(Reg) == MRI->def_end()) {
1052               DEBUG(dbgs() << "Register "
1053                            << PrintReg(Reg, MRI->getTargetRegisterInfo())
1054                            << " has NO defs\n");
1055             } else if (!MRI->hasOneDef(Reg)) {
1056               DEBUG(dbgs() << "Register "
1057                            << PrintReg(Reg, MRI->getTargetRegisterInfo())
1058                            << " has multiple defs\n");
1059             }
1060 
1061             assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
1062             MachineOperand *Def = &(*(MRI->def_begin(Reg)));
1063             MachineOperand *UseOperand = &(RI);
1064             bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB;
1065             if (UseIsOutsideDefMBB && UseOperand->isKill()) {
1066               DEBUG(dbgs() << "Removing kill flag on register: "
1067                            << PrintReg(Reg, TRI) << "\n");
1068               UseOperand->setIsKill(false);
1069             }
1070           }
1071         }
1072       }
1073     }
1074   }
1075 }
1076 
1077 void LinearizedRegion::initLiveOut(RegionMRT *Region,
1078                                    const MachineRegisterInfo *MRI,
1079                                    const TargetRegisterInfo *TRI,
1080                                    PHILinearize &PHIInfo) {
1081   storeLiveOuts(Region, MRI, TRI, PHIInfo);
1082 }
1083 
1084 LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB,
1085                                    const MachineRegisterInfo *MRI,
1086                                    const TargetRegisterInfo *TRI,
1087                                    PHILinearize &PHIInfo) {
1088   setEntry(MBB);
1089   setExit(MBB);
1090   storeLiveOuts(MBB, MRI, TRI, PHIInfo);
1091   MBBs.insert(MBB);
1092   Parent = nullptr;
1093 }
1094 
1095 LinearizedRegion::LinearizedRegion(RegionMRT *Region,
1096                                    const MachineRegisterInfo *MRI,
1097                                    const TargetRegisterInfo *TRI,
1098                                    PHILinearize &PHIInfo) {
1099   setEntry(Region->getEntry());
1100   // We don't have a single exit block that is part of the region
1101   // at this point. When the transform is performed this block
1102   // will be created.
1103   setExit(nullptr);
1104   storeLiveOuts(Region, MRI, TRI, PHIInfo);
1105   Parent = nullptr;
1106 }
1107 
1108 LinearizedRegion::LinearizedRegion() {
1109   setEntry(nullptr);
1110   setExit(nullptr);
1111   Parent = nullptr;
1112 }
1113 
1114 LinearizedRegion::~LinearizedRegion() {}
1115 
1116 class AMDGPUMachineCFGStructurizer : public MachineFunctionPass {
1117 private:
1118   const MachineRegionInfo *Regions;
1119   const SIInstrInfo *TII;
1120   const TargetRegisterInfo *TRI;
1121   MachineRegisterInfo *MRI;
1122   unsigned BBSelectRegister;
1123   PHILinearize PHIInfo;
1124   DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap;
1125 
1126   void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI,
1127                            SmallVector<unsigned, 2> &RegionIndices);
1128   void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1129                            SmallVector<unsigned, 2> &RegionIndices);
1130   void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI,
1131                               SmallVector<unsigned, 2> &PHINonRegionIndices);
1132 
1133   void storePHILinearizationInfoDest(
1134       unsigned LDestReg, MachineInstr &PHI,
1135       SmallVector<unsigned, 2> *RegionIndices = nullptr);
1136 
1137   unsigned storePHILinearizationInfo(MachineInstr &PHI,
1138                                      SmallVector<unsigned, 2> *RegionIndices);
1139 
1140   void extractKilledPHIs(MachineBasicBlock *MBB);
1141   void extractKilledPHIs(LinearizedRegion *LRegion);
1142 
1143   bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices,
1144                  unsigned *ReplaceReg);
1145 
1146   bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1147                  MachineBasicBlock *SourceMBB,
1148                  SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg);
1149 
1150   void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1151                   MachineBasicBlock *LastMerge,
1152                   SmallVector<unsigned, 2> &PHIRegionIndices);
1153   void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg,
1154                        MachineBasicBlock *IfMBB,
1155                        SmallVector<unsigned, 2> &PHIRegionIndices);
1156   void replaceLiveOutRegs(MachineInstr &PHI,
1157                           SmallVector<unsigned, 2> &PHIRegionIndices,
1158                           unsigned CombinedSourceReg,
1159                           LinearizedRegion *LRegion);
1160   void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge,
1161                             MachineInstr &PHI, LinearizedRegion *LRegion);
1162 
1163   void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge,
1164                              LinearizedRegion *LRegion);
1165   void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB,
1166                              MachineInstr &PHI);
1167   void rewriteRegionEntryPHIs(LinearizedRegion *Region,
1168                               MachineBasicBlock *IfMBB);
1169 
1170   bool regionIsSimpleIf(RegionMRT *Region);
1171 
1172   void transformSimpleIfRegion(RegionMRT *Region);
1173 
1174   bool regionIsSimpleLoop(RegionMRT *Region);
1175 
1176   void transformSimpleLoopRegion(RegionMRT *Region);
1177 
1178   void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II);
1179 
1180   void insertUnconditionalBranch(MachineBasicBlock *MBB,
1181                                  MachineBasicBlock *Dest,
1182                                  const DebugLoc &DL = DebugLoc());
1183 
1184   MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region);
1185 
1186   void replaceRegisterOutsideMBB(MachineBasicBlock *MBB, unsigned Register,
1187                                  unsigned NewRegister);
1188 
1189   void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1190                       MachineBasicBlock *MergeBB, unsigned DestRegister,
1191                       unsigned IfSourceRegister, unsigned CodeSourceRegister,
1192                       bool IsUndefIfSource = false);
1193 
1194   MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB,
1195                                    MachineBasicBlock *CodeBBStart,
1196                                    MachineBasicBlock *CodeBBEnd,
1197                                    MachineBasicBlock *SelectBB, unsigned IfReg,
1198                                    bool InheritPreds);
1199 
1200   void prunePHIInfo(MachineBasicBlock *MBB);
1201   void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg);
1202 
1203   void createEntryPHIs(LinearizedRegion *CurrentRegion);
1204   void resolvePHIInfos(MachineBasicBlock *FunctionEntry);
1205 
1206   void replaceRegisterWith(unsigned Register, unsigned NewRegister);
1207 
1208   MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB,
1209                                     MachineBasicBlock *CodeBB,
1210                                     LinearizedRegion *LRegion,
1211                                     unsigned BBSelectRegIn,
1212                                     unsigned BBSelectRegOut);
1213 
1214   MachineBasicBlock *
1215   createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion,
1216                  LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
1217                  unsigned BBSelectRegIn, unsigned BBSelectRegOut);
1218   void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond);
1219 
1220   void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
1221                                MachineBasicBlock *MergeBB,
1222                                unsigned BBSelectReg);
1223 
1224   MachineInstr *getDefInstr(unsigned Reg);
1225   void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1226                         MachineBasicBlock *MergeBB,
1227                         LinearizedRegion *InnerRegion, unsigned DestReg,
1228                         unsigned SourceReg);
1229   bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion,
1230                    unsigned Register);
1231   void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB,
1232                           MachineBasicBlock *MergeBB,
1233                           LinearizedRegion *InnerRegion,
1234                           LinearizedRegion *LRegion);
1235 
1236   void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry,
1237                     MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion);
1238   void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc,
1239                      LinearizedRegion *LRegion);
1240 
1241   MachineBasicBlock *splitExit(LinearizedRegion *LRegion);
1242 
1243   MachineBasicBlock *splitEntry(LinearizedRegion *LRegion);
1244 
1245   LinearizedRegion *initLinearizedRegion(RegionMRT *Region);
1246 
1247   bool structurizeComplexRegion(RegionMRT *Region);
1248 
1249   bool structurizeRegion(RegionMRT *Region);
1250 
1251   bool structurizeRegions(RegionMRT *Region, bool isTopRegion);
1252 
1253 public:
1254   static char ID;
1255 
1256   void getAnalysisUsage(AnalysisUsage &AU) const override {
1257     AU.addRequired<MachineRegionInfoPass>();
1258     MachineFunctionPass::getAnalysisUsage(AU);
1259   }
1260 
1261     AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) {
1262       initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry());
1263     }
1264 
1265   void initFallthroughMap(MachineFunction &MF);
1266 
1267   void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut);
1268 
1269   unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg,
1270                                      MachineRegisterInfo *MRI,
1271                                      const SIInstrInfo *TII);
1272 
1273   RegionMRT *RMRT;
1274   void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; }
1275 
1276   RegionMRT *getRegionMRT() { return RMRT; }
1277 
1278   bool runOnMachineFunction(MachineFunction &MF) override;
1279 };
1280 }
1281 
1282 char AMDGPUMachineCFGStructurizer::ID = 0;
1283 
1284 bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) {
1285   MachineBasicBlock *Entry = Region->getEntry();
1286   MachineBasicBlock *Succ = Region->getSucc();
1287   bool FoundBypass = false;
1288   bool FoundIf = false;
1289 
1290   if (Entry->succ_size() != 2) {
1291     return false;
1292   }
1293 
1294   for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(),
1295                                               E = Entry->succ_end();
1296        SI != E; ++SI) {
1297     MachineBasicBlock *Current = *SI;
1298 
1299     if (Current == Succ) {
1300       FoundBypass = true;
1301     } else if ((Current->succ_size() == 1) &&
1302                *(Current->succ_begin()) == Succ) {
1303       FoundIf = true;
1304     }
1305   }
1306 
1307   return FoundIf && FoundBypass;
1308 }
1309 
1310 void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) {
1311   MachineBasicBlock *Entry = Region->getEntry();
1312   MachineBasicBlock *Exit = Region->getExit();
1313   TII->convertNonUniformIfRegion(Entry, Exit);
1314 }
1315 
1316 bool AMDGPUMachineCFGStructurizer::regionIsSimpleLoop(RegionMRT *Region) {
1317   MachineBasicBlock *Entry = Region->getEntry();
1318 
1319   if (Entry->succ_size() != 1) {
1320     return false;
1321   }
1322 
1323   int NumRegionExitEdges = 0;
1324   MachineBasicBlock *BackBlock = nullptr;
1325   for (MachineBasicBlock::const_pred_iterator PI = Entry->succ_begin(),
1326                                               PE = Entry->succ_end();
1327        PI != PE; ++PI) {
1328     MachineBasicBlock *CurrentSP = *PI;
1329     if (Region->contains(CurrentSP)) {
1330       BackBlock = CurrentSP;
1331     }
1332   }
1333 
1334   bool HasBackedge = false;
1335 
1336   for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(),
1337                                               SE = Entry->succ_end();
1338        SI != SE; ++SI) {
1339     MachineBasicBlock *CurrentBS = *SI;
1340     if (CurrentBS == Entry) {
1341       HasBackedge = true;
1342     }
1343   }
1344 
1345   return NumRegionExitEdges == 1 && BackBlock->succ_size() == 2 && HasBackedge;
1346 }
1347 
1348 void AMDGPUMachineCFGStructurizer::transformSimpleLoopRegion(RegionMRT *Region) {
1349   MachineBasicBlock *Entry = Region->getEntry();
1350   MachineBasicBlock *BackBlock = nullptr;
1351   for (MachineBasicBlock::const_pred_iterator PI = Entry->succ_begin(),
1352                                               PE = Entry->succ_end();
1353        PI != PE; ++PI) {
1354     MachineBasicBlock *CurrentSP = *PI;
1355     if (Region->contains(CurrentSP)) {
1356       BackBlock = CurrentSP;
1357     }
1358   }
1359 
1360   TII->convertNonUniformLoopRegion(Entry, BackBlock);
1361 }
1362 
1363 static void fixMBBTerminator(MachineBasicBlock *MBB) {
1364 
1365   if (MBB->succ_size() == 1) {
1366     auto *Succ = *(MBB->succ_begin());
1367     for (auto &TI : MBB->terminators()) {
1368       for (auto &UI : TI.uses()) {
1369         if (UI.isMBB() && UI.getMBB() != Succ) {
1370           UI.setMBB(Succ);
1371         }
1372       }
1373     }
1374   }
1375 }
1376 
1377 static void fixRegionTerminator(RegionMRT *Region) {
1378   MachineBasicBlock *InternalSucc = nullptr;
1379   MachineBasicBlock *ExternalSucc = nullptr;
1380   LinearizedRegion *LRegion = Region->getLinearizedRegion();
1381   auto Exit = LRegion->getExit();
1382 
1383   SmallPtrSet<MachineBasicBlock *, 2> Successors;
1384   for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(),
1385                                               SE = Exit->succ_end();
1386        SI != SE; ++SI) {
1387     MachineBasicBlock *Succ = *SI;
1388     if (LRegion->contains(Succ)) {
1389       // Do not allow re-assign
1390       assert(InternalSucc == nullptr);
1391       InternalSucc = Succ;
1392     } else {
1393       // Do not allow re-assign
1394       assert(ExternalSucc == nullptr);
1395       ExternalSucc = Succ;
1396     }
1397   }
1398 
1399   for (auto &TI : Exit->terminators()) {
1400     for (auto &UI : TI.uses()) {
1401       if (UI.isMBB()) {
1402         auto Target = UI.getMBB();
1403         if (Target != InternalSucc && Target != ExternalSucc) {
1404           UI.setMBB(ExternalSucc);
1405         }
1406       }
1407     }
1408   }
1409 }
1410 
1411 // If a region region is just a sequence of regions (and the exit
1412 // block in the case of the top level region), we can simply skip
1413 // linearizing it, because it is already linear
1414 bool regionIsSequence(RegionMRT *Region) {
1415   auto Children = Region->getChildren();
1416   for (auto CI : *Children) {
1417     if (!CI->isRegion()) {
1418       if (CI->getMBBMRT()->getMBB()->succ_size() > 1) {
1419         return false;
1420       }
1421     }
1422   }
1423   return true;
1424 }
1425 
1426 void fixupRegionExits(RegionMRT *Region) {
1427   auto Children = Region->getChildren();
1428   for (auto CI : *Children) {
1429     if (!CI->isRegion()) {
1430       fixMBBTerminator(CI->getMBBMRT()->getMBB());
1431     } else {
1432       fixRegionTerminator(CI->getRegionMRT());
1433     }
1434   }
1435 }
1436 
1437 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1438     RegionMRT *Region, MachineInstr &PHI,
1439     SmallVector<unsigned, 2> &PHIRegionIndices) {
1440   unsigned NumInputs = getPHINumInputs(PHI);
1441   for (unsigned i = 0; i < NumInputs; ++i) {
1442     MachineBasicBlock *Pred = getPHIPred(PHI, i);
1443     if (Region->contains(Pred)) {
1444       PHIRegionIndices.push_back(i);
1445     }
1446   }
1447 }
1448 
1449 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices(
1450     LinearizedRegion *Region, MachineInstr &PHI,
1451     SmallVector<unsigned, 2> &PHIRegionIndices) {
1452   unsigned NumInputs = getPHINumInputs(PHI);
1453   for (unsigned i = 0; i < NumInputs; ++i) {
1454     MachineBasicBlock *Pred = getPHIPred(PHI, i);
1455     if (Region->contains(Pred)) {
1456       PHIRegionIndices.push_back(i);
1457     }
1458   }
1459 }
1460 
1461 void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices(
1462     LinearizedRegion *Region, MachineInstr &PHI,
1463     SmallVector<unsigned, 2> &PHINonRegionIndices) {
1464   unsigned NumInputs = getPHINumInputs(PHI);
1465   for (unsigned i = 0; i < NumInputs; ++i) {
1466     MachineBasicBlock *Pred = getPHIPred(PHI, i);
1467     if (!Region->contains(Pred)) {
1468       PHINonRegionIndices.push_back(i);
1469     }
1470   }
1471 }
1472 
1473 void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest(
1474     unsigned LDestReg, MachineInstr &PHI,
1475     SmallVector<unsigned, 2> *RegionIndices) {
1476   if (RegionIndices) {
1477     for (auto i : *RegionIndices) {
1478       PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1479     }
1480   } else {
1481     unsigned NumInputs = getPHINumInputs(PHI);
1482     for (unsigned i = 0; i < NumInputs; ++i) {
1483       PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i));
1484     }
1485   }
1486 }
1487 
1488 unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo(
1489     MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) {
1490   unsigned DestReg = getPHIDestReg(PHI);
1491   unsigned LinearizeDestReg =
1492       MRI->createVirtualRegister(MRI->getRegClass(DestReg));
1493   PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc());
1494   storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices);
1495   return LinearizeDestReg;
1496 }
1497 
1498 void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) {
1499   // We need to create a new chain for the killed phi, but there is no
1500   // need to do the renaming outside or inside the block.
1501   SmallPtrSet<MachineInstr *, 2> PHIs;
1502   for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(),
1503                                          E = MBB->instr_end();
1504        I != E; ++I) {
1505     MachineInstr &Instr = *I;
1506     if (Instr.isPHI()) {
1507       unsigned PHIDestReg = getPHIDestReg(Instr);
1508       DEBUG(dbgs() << "Extractking killed phi:\n");
1509       DEBUG(Instr.dump());
1510       PHIs.insert(&Instr);
1511       PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc());
1512       storePHILinearizationInfoDest(PHIDestReg, Instr);
1513     }
1514   }
1515 
1516   for (auto PI : PHIs) {
1517     PI->eraseFromParent();
1518   }
1519 }
1520 
1521 void AMDGPUMachineCFGStructurizer::extractKilledPHIs(LinearizedRegion *LRegion) {
1522   // PHIs can only exist in the entry block.
1523   extractKilledPHIs(LRegion->getEntry());
1524 }
1525 
1526 static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices,
1527                              unsigned Index) {
1528   for (auto i : PHIRegionIndices) {
1529     if (i == Index)
1530       return true;
1531   }
1532   return false;
1533 }
1534 
1535 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1536                                        SmallVector<unsigned, 2> &PHIIndices,
1537                                        unsigned *ReplaceReg) {
1538   return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg);
1539 }
1540 
1541 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI,
1542                                        unsigned CombinedSourceReg,
1543                                        MachineBasicBlock *SourceMBB,
1544                                        SmallVector<unsigned, 2> &PHIIndices,
1545                                        unsigned *ReplaceReg) {
1546   DEBUG(dbgs() << "Shrink PHI: ");
1547   DEBUG(PHI.dump());
1548   DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI)
1549                << "<def> = PHI(");
1550 
1551   bool Replaced = false;
1552   unsigned NumInputs = getPHINumInputs(PHI);
1553   int SingleExternalEntryIndex = -1;
1554   for (unsigned i = 0; i < NumInputs; ++i) {
1555     if (!isPHIRegionIndex(PHIIndices, i)) {
1556       if (SingleExternalEntryIndex == -1) {
1557         // Single entry
1558         SingleExternalEntryIndex = i;
1559       } else {
1560         // Multiple entries
1561         SingleExternalEntryIndex = -2;
1562       }
1563     }
1564   }
1565 
1566   if (SingleExternalEntryIndex > -1) {
1567     *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex);
1568     // We should not rewrite the code, we should only pick up the single value
1569     // that represents the shrunk PHI.
1570     Replaced = true;
1571   } else {
1572     MachineBasicBlock *MBB = PHI.getParent();
1573     MachineInstrBuilder MIB =
1574         BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1575                 getPHIDestReg(PHI));
1576     if (SourceMBB) {
1577       MIB.addReg(CombinedSourceReg);
1578       MIB.addMBB(SourceMBB);
1579       DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#"
1580                    << SourceMBB->getNumber());
1581     }
1582 
1583     for (unsigned i = 0; i < NumInputs; ++i) {
1584       if (isPHIRegionIndex(PHIIndices, i)) {
1585         continue;
1586       }
1587       unsigned SourceReg = getPHISourceReg(PHI, i);
1588       MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1589       MIB.addReg(SourceReg);
1590       MIB.addMBB(SourcePred);
1591       DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#"
1592                    << SourcePred->getNumber());
1593     }
1594     DEBUG(dbgs() << ")\n");
1595   }
1596   PHI.eraseFromParent();
1597   return Replaced;
1598 }
1599 
1600 void AMDGPUMachineCFGStructurizer::replacePHI(
1601     MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge,
1602     SmallVector<unsigned, 2> &PHIRegionIndices) {
1603   DEBUG(dbgs() << "Replace PHI: ");
1604   DEBUG(PHI.dump());
1605   DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI)
1606                << "<def> = PHI(");
1607 
1608   bool HasExternalEdge = false;
1609   unsigned NumInputs = getPHINumInputs(PHI);
1610   for (unsigned i = 0; i < NumInputs; ++i) {
1611     if (!isPHIRegionIndex(PHIRegionIndices, i)) {
1612       HasExternalEdge = true;
1613     }
1614   }
1615 
1616   if (HasExternalEdge) {
1617     MachineBasicBlock *MBB = PHI.getParent();
1618     MachineInstrBuilder MIB =
1619         BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1620                 getPHIDestReg(PHI));
1621     MIB.addReg(CombinedSourceReg);
1622     MIB.addMBB(LastMerge);
1623     DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#"
1624                  << LastMerge->getNumber());
1625     for (unsigned i = 0; i < NumInputs; ++i) {
1626       if (isPHIRegionIndex(PHIRegionIndices, i)) {
1627         continue;
1628       }
1629       unsigned SourceReg = getPHISourceReg(PHI, i);
1630       MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1631       MIB.addReg(SourceReg);
1632       MIB.addMBB(SourcePred);
1633       DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#"
1634                    << SourcePred->getNumber());
1635     }
1636     DEBUG(dbgs() << ")\n");
1637   } else {
1638     replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg);
1639   }
1640   PHI.eraseFromParent();
1641 }
1642 
1643 void AMDGPUMachineCFGStructurizer::replaceEntryPHI(
1644     MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB,
1645     SmallVector<unsigned, 2> &PHIRegionIndices) {
1646 
1647   DEBUG(dbgs() << "Replace entry PHI: ");
1648   DEBUG(PHI.dump());
1649   DEBUG(dbgs() << " with ");
1650 
1651   unsigned NumInputs = getPHINumInputs(PHI);
1652   unsigned NumNonRegionInputs = NumInputs;
1653   for (unsigned i = 0; i < NumInputs; ++i) {
1654     if (isPHIRegionIndex(PHIRegionIndices, i)) {
1655       NumNonRegionInputs--;
1656     }
1657   }
1658 
1659   if (NumNonRegionInputs == 0) {
1660     auto DestReg = getPHIDestReg(PHI);
1661     replaceRegisterWith(DestReg, CombinedSourceReg);
1662     DEBUG(dbgs() << " register " << PrintReg(CombinedSourceReg, TRI) << "\n");
1663     PHI.eraseFromParent();
1664   } else {
1665     DEBUG(dbgs() << PrintReg(getPHIDestReg(PHI), TRI) << "<def> = PHI(");
1666     MachineBasicBlock *MBB = PHI.getParent();
1667     MachineInstrBuilder MIB =
1668         BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI),
1669                 getPHIDestReg(PHI));
1670     MIB.addReg(CombinedSourceReg);
1671     MIB.addMBB(IfMBB);
1672     DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#"
1673                  << IfMBB->getNumber());
1674     unsigned NumInputs = getPHINumInputs(PHI);
1675     for (unsigned i = 0; i < NumInputs; ++i) {
1676       if (isPHIRegionIndex(PHIRegionIndices, i)) {
1677         continue;
1678       }
1679       unsigned SourceReg = getPHISourceReg(PHI, i);
1680       MachineBasicBlock *SourcePred = getPHIPred(PHI, i);
1681       MIB.addReg(SourceReg);
1682       MIB.addMBB(SourcePred);
1683       DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#"
1684                    << SourcePred->getNumber());
1685     }
1686     DEBUG(dbgs() << ")\n");
1687     PHI.eraseFromParent();
1688   }
1689 }
1690 
1691 void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs(
1692     MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices,
1693     unsigned CombinedSourceReg, LinearizedRegion *LRegion) {
1694   bool WasLiveOut = false;
1695   for (auto PII : PHIRegionIndices) {
1696     unsigned Reg = getPHISourceReg(PHI, PII);
1697     if (LRegion->isLiveOut(Reg)) {
1698       bool IsDead = true;
1699 
1700       // Check if register is live out of the basic block
1701       MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent();
1702       for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) {
1703         if ((*UI).getParent()->getParent() != DefMBB) {
1704           IsDead = false;
1705         }
1706       }
1707 
1708       DEBUG(dbgs() << "Register " << PrintReg(Reg, TRI) << " is "
1709                    << (IsDead ? "dead" : "alive") << " after PHI replace\n");
1710       if (IsDead) {
1711         LRegion->removeLiveOut(Reg);
1712       }
1713       WasLiveOut = true;
1714     }
1715   }
1716 
1717   if (WasLiveOut)
1718     LRegion->addLiveOut(CombinedSourceReg);
1719 }
1720 
1721 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region,
1722                                                   MachineBasicBlock *LastMerge,
1723                                                   MachineInstr &PHI,
1724                                                   LinearizedRegion *LRegion) {
1725   SmallVector<unsigned, 2> PHIRegionIndices;
1726   getPHIRegionIndices(Region, PHI, PHIRegionIndices);
1727   unsigned LinearizedSourceReg =
1728       storePHILinearizationInfo(PHI, &PHIRegionIndices);
1729 
1730   replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices);
1731   replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion);
1732 }
1733 
1734 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region,
1735                                                    MachineBasicBlock *IfMBB,
1736                                                    MachineInstr &PHI) {
1737   SmallVector<unsigned, 2> PHINonRegionIndices;
1738   getPHINonRegionIndices(Region, PHI, PHINonRegionIndices);
1739   unsigned LinearizedSourceReg =
1740       storePHILinearizationInfo(PHI, &PHINonRegionIndices);
1741   replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices);
1742 }
1743 
1744 static void collectPHIs(MachineBasicBlock *MBB,
1745                         SmallVector<MachineInstr *, 2> &PHIs) {
1746   for (auto &BBI : *MBB) {
1747     if (BBI.isPHI()) {
1748       PHIs.push_back(&BBI);
1749     }
1750   }
1751 }
1752 
1753 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region,
1754                                                    MachineBasicBlock *LastMerge,
1755                                                    LinearizedRegion *LRegion) {
1756   SmallVector<MachineInstr *, 2> PHIs;
1757   auto Exit = Region->getSucc();
1758   if (Exit == nullptr)
1759     return;
1760 
1761   collectPHIs(Exit, PHIs);
1762 
1763   for (auto PHII : PHIs) {
1764     rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion);
1765   }
1766 }
1767 
1768 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region,
1769                                                     MachineBasicBlock *IfMBB) {
1770   SmallVector<MachineInstr *, 2> PHIs;
1771   auto Entry = Region->getEntry();
1772 
1773   collectPHIs(Entry, PHIs);
1774 
1775   for (auto PHII : PHIs) {
1776     rewriteRegionEntryPHI(Region, IfMBB, *PHII);
1777   }
1778 }
1779 
1780 void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB,
1781                                                        MachineBasicBlock *Dest,
1782                                                        const DebugLoc &DL) {
1783   DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber()
1784                << " -> " << Dest->getNumber() << "\n");
1785   MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator();
1786   bool HasTerminator = Terminator != MBB->instr_end();
1787   if (HasTerminator) {
1788     TII->ReplaceTailWithBranchTo(Terminator, Dest);
1789   }
1790   if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) {
1791     TII->insertUnconditionalBranch(*MBB, Dest, DL);
1792   }
1793 }
1794 
1795 static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) {
1796   MachineBasicBlock *result = nullptr;
1797   for (auto &MFI : MF) {
1798     if (MFI.succ_size() == 0) {
1799       if (result == nullptr) {
1800         result = &MFI;
1801       } else {
1802         return nullptr;
1803       }
1804     }
1805   }
1806 
1807   return result;
1808 }
1809 
1810 static bool hasOneExitNode(MachineFunction &MF) {
1811   return getSingleExitNode(MF) != nullptr;
1812 }
1813 
1814 MachineBasicBlock *
1815 AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) {
1816   auto Exit = Region->getSucc();
1817 
1818   // If the exit is the end of the function, we just use the existing
1819   MachineFunction *MF = Region->getEntry()->getParent();
1820   if (Exit == nullptr && hasOneExitNode(*MF)) {
1821     return &(*(--(Region->getEntry()->getParent()->end())));
1822   }
1823 
1824   MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock();
1825   if (Exit == nullptr) {
1826     MachineFunction::iterator ExitIter = MF->end();
1827     MF->insert(ExitIter, LastMerge);
1828   } else {
1829     MachineFunction::iterator ExitIter = Exit->getIterator();
1830     MF->insert(ExitIter, LastMerge);
1831     LastMerge->addSuccessor(Exit);
1832     insertUnconditionalBranch(LastMerge, Exit);
1833     DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n");
1834   }
1835   return LastMerge;
1836 }
1837 
1838 void AMDGPUMachineCFGStructurizer::replaceRegisterOutsideMBB(MachineBasicBlock *MBB,
1839                                                        unsigned Register,
1840                                                        unsigned NewRegister) {
1841   assert(Register != NewRegister && "Cannot replace a reg with itself");
1842 
1843   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
1844                                          E = MRI->reg_end();
1845        I != E;) {
1846     MachineOperand &O = *I;
1847     ++I;
1848     if (O.getParent()->getParent() != MBB) {
1849       if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
1850         llvm_unreachable("Cannot substitute physical registers");
1851         // We don't handle physical registers, but if we need to in the future
1852         // This is how we do it: O.substPhysReg(NewRegister, *TRI);
1853       } else {
1854         O.setReg(NewRegister);
1855       }
1856     }
1857   }
1858 }
1859 
1860 void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB,
1861                                             MachineBasicBlock *CodeBB,
1862                                             MachineBasicBlock *MergeBB,
1863                                             unsigned DestRegister,
1864                                             unsigned IfSourceRegister,
1865                                             unsigned CodeSourceRegister,
1866                                             bool IsUndefIfSource) {
1867   // If this is the function exit block, we don't need a phi.
1868   if (MergeBB->succ_begin() == MergeBB->succ_end()) {
1869     return;
1870   }
1871   DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber()
1872                << "): " << PrintReg(DestRegister, TRI) << "<def> = PHI("
1873                << PrintReg(IfSourceRegister, TRI) << ", BB#"
1874                << IfBB->getNumber() << PrintReg(CodeSourceRegister, TRI)
1875                << ", BB#" << CodeBB->getNumber() << ")\n");
1876   const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin());
1877   MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL,
1878                                     TII->get(TargetOpcode::PHI), DestRegister);
1879   if (IsUndefIfSource && false) {
1880     MIB.addReg(IfSourceRegister, RegState::Undef);
1881   } else {
1882     MIB.addReg(IfSourceRegister);
1883   }
1884   MIB.addMBB(IfBB);
1885   MIB.addReg(CodeSourceRegister);
1886   MIB.addMBB(CodeBB);
1887 }
1888 
1889 static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) {
1890   for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(),
1891                                         E = MBB->succ_end();
1892        PI != E; ++PI) {
1893     if ((*PI) != MBB) {
1894       (MBB)->removeSuccessor(*PI);
1895     }
1896   }
1897 }
1898 
1899 static void removeExternalCFGEdges(MachineBasicBlock *StartMBB,
1900                                    MachineBasicBlock *EndMBB) {
1901 
1902   // We have to check against the StartMBB successor becasuse a
1903   // structurized region with a loop will have the entry block split,
1904   // and the backedge will go to the entry successor.
1905   DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs;
1906   unsigned SuccSize = StartMBB->succ_size();
1907   if (SuccSize > 0) {
1908     MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin());
1909     for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(),
1910                                           E = EndMBB->succ_end();
1911          PI != E; ++PI) {
1912       // Either we have a back-edge to the entry block, or a back-edge to the
1913       // succesor of the entry block since the block may be split.
1914       if ((*PI) != StartMBB &&
1915           !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) {
1916         Succs.insert(
1917             std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI));
1918       }
1919     }
1920   }
1921 
1922   for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(),
1923                                         E = StartMBB->pred_end();
1924        PI != E; ++PI) {
1925     if ((*PI) != EndMBB) {
1926       Succs.insert(
1927           std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB));
1928     }
1929   }
1930 
1931   for (auto SI : Succs) {
1932     std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI;
1933     DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#"
1934                  << Edge.second->getNumber() << "\n");
1935     Edge.first->removeSuccessor(Edge.second);
1936   }
1937 }
1938 
1939 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock(
1940     MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart,
1941     MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg,
1942     bool InheritPreds) {
1943   MachineFunction *MF = MergeBB->getParent();
1944   MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock();
1945 
1946   if (InheritPreds) {
1947     for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(),
1948                                           E = CodeBBStart->pred_end();
1949          PI != E; ++PI) {
1950       if ((*PI) != CodeBBEnd) {
1951         MachineBasicBlock *Pred = (*PI);
1952         Pred->addSuccessor(IfBB);
1953       }
1954     }
1955   }
1956 
1957   removeExternalCFGEdges(CodeBBStart, CodeBBEnd);
1958 
1959   auto CodeBBStartI = CodeBBStart->getIterator();
1960   auto CodeBBEndI = CodeBBEnd->getIterator();
1961   auto MergeIter = MergeBB->getIterator();
1962   MF->insert(MergeIter, IfBB);
1963   MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI);
1964   IfBB->addSuccessor(MergeBB);
1965   IfBB->addSuccessor(CodeBBStart);
1966 
1967   DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n");
1968   // Ensure that the MergeBB is a succesor of the CodeEndBB.
1969   if (!CodeBBEnd->isSuccessor(MergeBB))
1970     CodeBBEnd->addSuccessor(MergeBB);
1971 
1972   DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#"
1973                << CodeBBEnd->getNumber() << "\n");
1974 
1975   // If we have a single predecessor we can find a reasonable debug location
1976   MachineBasicBlock *SinglePred =
1977       CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr;
1978   const DebugLoc &DL = SinglePred
1979                     ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator())
1980                     : DebugLoc();
1981 
1982   unsigned Reg =
1983       TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg,
1984                     SelectBB->getNumber() /* CodeBBStart->getNumber() */);
1985   if (&(*(IfBB->getParent()->begin())) == IfBB) {
1986     TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg,
1987 			      CodeBBStart->getNumber());
1988   }
1989   MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
1990   ArrayRef<MachineOperand> Cond(RegOp);
1991   TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL);
1992 
1993   return IfBB;
1994 }
1995 
1996 void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled(
1997     SmallVector<MachineOperand, 1> Cond) {
1998   if (Cond.size() != 1)
1999     return;
2000   if (!Cond[0].isReg())
2001     return;
2002 
2003   unsigned CondReg = Cond[0].getReg();
2004   for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) {
2005     (*UI).setIsKill(false);
2006   }
2007 }
2008 
2009 void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB,
2010                                                      MachineBasicBlock *MergeBB,
2011                                                      unsigned BBSelectReg) {
2012   MachineBasicBlock *TrueBB = nullptr;
2013   MachineBasicBlock *FalseBB = nullptr;
2014   SmallVector<MachineOperand, 1> Cond;
2015   MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB];
2016   TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond);
2017 
2018   const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator());
2019 
2020   if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) {
2021     // This is an exit block, hence no successors. We will assign the
2022     // bb select register to the entry block.
2023     TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
2024 			      BBSelectReg,
2025 			      CodeBB->getParent()->begin()->getNumber());
2026     insertUnconditionalBranch(CodeBB, MergeBB, DL);
2027     return;
2028   }
2029 
2030   if (FalseBB == nullptr && TrueBB == nullptr) {
2031     TrueBB = FallthroughBB;
2032   } else if (TrueBB != nullptr) {
2033     FalseBB =
2034         (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB;
2035   }
2036 
2037   if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) {
2038     TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
2039 			      BBSelectReg, TrueBB->getNumber());
2040   } else {
2041     const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg);
2042     unsigned TrueBBReg = MRI->createVirtualRegister(RegClass);
2043     unsigned FalseBBReg = MRI->createVirtualRegister(RegClass);
2044     TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
2045 			      TrueBBReg, TrueBB->getNumber());
2046     TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL,
2047 			      FalseBBReg, FalseBB->getNumber());
2048     ensureCondIsNotKilled(Cond);
2049     TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL,
2050                             BBSelectReg, Cond, TrueBBReg, FalseBBReg);
2051   }
2052 
2053   insertUnconditionalBranch(CodeBB, MergeBB, DL);
2054 }
2055 
2056 MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) {
2057   if (MRI->def_begin(Reg) == MRI->def_end()) {
2058     DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo())
2059                  << " has NO defs\n");
2060   } else if (!MRI->hasOneDef(Reg)) {
2061     DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo())
2062                  << " has multiple defs\n");
2063     DEBUG(dbgs() << "DEFS BEGIN:\n");
2064     for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) {
2065       DEBUG(DI->getParent()->dump());
2066     }
2067     DEBUG(dbgs() << "DEFS END\n");
2068   }
2069 
2070   assert(MRI->hasOneDef(Reg) && "Register has multiple definitions");
2071   return (*(MRI->def_begin(Reg))).getParent();
2072 }
2073 
2074 void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB,
2075                                               MachineBasicBlock *CodeBB,
2076                                               MachineBasicBlock *MergeBB,
2077                                               LinearizedRegion *InnerRegion,
2078                                               unsigned DestReg,
2079                                               unsigned SourceReg) {
2080   // In this function we know we are part of a chain already, so we need
2081   // to add the registers to the existing chain, and rename the register
2082   // inside the region.
2083   bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
2084   MachineInstr *DefInstr = getDefInstr(SourceReg);
2085   if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) {
2086     // Handle the case where the def is a PHI-def inside a basic
2087     // block, then we only need to do renaming. Special care needs to
2088     // be taken if the PHI-def is part of an existing chain, or if a
2089     // new one needs to be created.
2090     InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI);
2091 
2092     // We collect all PHI Information, and if we are at the region entry,
2093     // all PHIs will be removed, and then re-introduced if needed.
2094     storePHILinearizationInfoDest(DestReg, *DefInstr);
2095     // We have picked up all the information we need now and can remove
2096     // the PHI
2097     PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
2098     DefInstr->eraseFromParent();
2099   } else {
2100     // If this is not a phi-def, or it is a phi-def but from a linearized region
2101     if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) {
2102       // If this is a single BB and the definition is in this block we
2103       // need to replace any uses outside the region.
2104       InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI);
2105     }
2106     const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg);
2107     unsigned NextDestReg = MRI->createVirtualRegister(RegClass);
2108     bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1;
2109     DEBUG(dbgs() << "Insert Chained PHI\n");
2110     insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg,
2111                    SourceReg, IsLastDef);
2112 
2113     PHIInfo.removeSource(DestReg, SourceReg, CodeBB);
2114     if (IsLastDef) {
2115       const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator());
2116       TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL,
2117 				NextDestReg, 0);
2118       PHIInfo.deleteDef(DestReg);
2119     } else {
2120       PHIInfo.replaceDef(DestReg, NextDestReg);
2121     }
2122   }
2123 }
2124 
2125 bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB,
2126                                          LinearizedRegion *InnerRegion,
2127                                          unsigned Register) {
2128   return getDefInstr(Register)->getParent() == MBB ||
2129          InnerRegion->contains(getDefInstr(Register)->getParent());
2130 }
2131 
2132 void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB,
2133                                                 MachineBasicBlock *CodeBB,
2134                                                 MachineBasicBlock *MergeBB,
2135                                                 LinearizedRegion *InnerRegion,
2136                                                 LinearizedRegion *LRegion) {
2137   DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts();
2138   SmallVector<unsigned, 4> OldLiveOuts;
2139   bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit();
2140   for (auto OLI : *LiveOuts) {
2141     OldLiveOuts.push_back(OLI);
2142   }
2143 
2144   for (auto LI : OldLiveOuts) {
2145     DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI));
2146     if (!containsDef(CodeBB, InnerRegion, LI) ||
2147         (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) {
2148       // If the register simly lives through the CodeBB, we don't have
2149       // to rewrite anything since the register is not defined in this
2150       // part of the code.
2151       DEBUG(dbgs() << "- through");
2152       continue;
2153     }
2154     DEBUG(dbgs() << "\n");
2155     unsigned Reg = LI;
2156     if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) {
2157       // If the register is live out, we do want to create a phi,
2158       // unless it is from the Exit block, becasuse in that case there
2159       // is already a PHI, and no need to create a new one.
2160 
2161       // If the register is just a live out def and not part of a phi
2162       // chain, we need to create a PHI node to handle the if region,
2163       // and replace all uses outside of the region with the new dest
2164       // register, unless it is the outgoing BB select register. We have
2165       // already creaed phi nodes for these.
2166       const TargetRegisterClass *RegClass = MRI->getRegClass(Reg);
2167       unsigned PHIDestReg = MRI->createVirtualRegister(RegClass);
2168       unsigned IfSourceReg = MRI->createVirtualRegister(RegClass);
2169       // Create initializer, this value is never used, but is needed
2170       // to satisfy SSA.
2171       DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n");
2172       TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(),
2173                         IfSourceReg, 0);
2174 
2175       InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI);
2176       DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n");
2177       insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg,
2178                      IfSourceReg, Reg, true);
2179     }
2180   }
2181 
2182   // Handle the chained definitions in PHIInfo, checking if this basic block
2183   // is a source block for a definition.
2184   SmallVector<unsigned, 4> Sources;
2185   if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) {
2186     DEBUG(dbgs() << "Inserting PHI Live Out from BB#" << CodeBB->getNumber()
2187                  << "\n");
2188     for (auto SI : Sources) {
2189       unsigned DestReg;
2190       PHIInfo.findDest(SI, CodeBB, DestReg);
2191       insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI);
2192     }
2193     DEBUG(dbgs() << "Insertion done.\n");
2194   }
2195 
2196   PHIInfo.dump(MRI);
2197 }
2198 
2199 void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) {
2200   DEBUG(dbgs() << "Before PHI Prune\n");
2201   DEBUG(PHIInfo.dump(MRI));
2202   SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4>
2203       ElimiatedSources;
2204   for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2205        ++DRI) {
2206 
2207     unsigned DestReg = *DRI;
2208     auto SE = PHIInfo.sources_end(DestReg);
2209 
2210     bool MBBContainsPHISource = false;
2211     // Check if there is a PHI source in this MBB
2212     for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2213       unsigned SourceReg = (*SRI).first;
2214       MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2215       if (Def->getParent()->getParent() == MBB) {
2216         MBBContainsPHISource = true;
2217       }
2218     }
2219 
2220     // If so, all other sources are useless since we know this block
2221     // is always executed when the region is executed.
2222     if (MBBContainsPHISource) {
2223       for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2224         PHILinearize::PHISourceT Source = *SRI;
2225         unsigned SourceReg = Source.first;
2226         MachineBasicBlock *SourceMBB = Source.second;
2227         MachineOperand *Def = &(*(MRI->def_begin(SourceReg)));
2228         if (Def->getParent()->getParent() != MBB) {
2229           ElimiatedSources.push_back(
2230               std::make_tuple(DestReg, SourceReg, SourceMBB));
2231         }
2232       }
2233     }
2234   }
2235 
2236   // Remove the PHI sources that are in the given MBB
2237   for (auto &SourceInfo : ElimiatedSources) {
2238     PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo),
2239                          std::get<2>(SourceInfo));
2240   }
2241   DEBUG(dbgs() << "After PHI Prune\n");
2242   DEBUG(PHIInfo.dump(MRI));
2243 }
2244 
2245 void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion,
2246                                             unsigned DestReg) {
2247   MachineBasicBlock *Entry = CurrentRegion->getEntry();
2248   MachineBasicBlock *Exit = CurrentRegion->getExit();
2249   MachineBasicBlock *Pred = *(Entry->pred_begin());
2250 
2251   DEBUG(dbgs() << "RegionExit: " << Exit->getNumber()
2252                << " Pred: " << Pred->getNumber() << "\n");
2253 
2254   int NumSources = 0;
2255   auto SE = PHIInfo.sources_end(DestReg);
2256 
2257   for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2258     NumSources++;
2259   }
2260 
2261   if (NumSources == 1) {
2262     auto SRI = PHIInfo.sources_begin(DestReg);
2263     unsigned SourceReg = (*SRI).first;
2264     replaceRegisterWith(DestReg, SourceReg);
2265   } else {
2266     const DebugLoc &DL = Entry->findDebugLoc(Entry->begin());
2267     MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL,
2268                                       TII->get(TargetOpcode::PHI), DestReg);
2269     DEBUG(dbgs() << "Entry PHI " << PrintReg(DestReg, TRI) << "<def> = PHI(");
2270 
2271     unsigned CurrentBackedgeReg = 0;
2272 
2273     for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) {
2274       unsigned SourceReg = (*SRI).first;
2275 
2276       if (CurrentRegion->contains((*SRI).second)) {
2277         if (CurrentBackedgeReg == 0) {
2278           CurrentBackedgeReg = SourceReg;
2279         } else {
2280           MachineInstr *PHIDefInstr = getDefInstr(SourceReg);
2281           MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent();
2282           const TargetRegisterClass *RegClass =
2283               MRI->getRegClass(CurrentBackedgeReg);
2284           unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass);
2285           MachineInstrBuilder BackedgePHI =
2286               BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL,
2287                       TII->get(TargetOpcode::PHI), NewBackedgeReg);
2288           BackedgePHI.addReg(CurrentBackedgeReg);
2289           BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0));
2290           BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1));
2291           BackedgePHI.addMBB((*SRI).second);
2292           CurrentBackedgeReg = NewBackedgeReg;
2293           DEBUG(dbgs() << "Inserting backedge PHI: "
2294                        << PrintReg(NewBackedgeReg, TRI) << "<def> = PHI("
2295                        << PrintReg(CurrentBackedgeReg, TRI) << ", BB#"
2296                        << getPHIPred(*PHIDefInstr, 0)->getNumber() << ", "
2297                        << PrintReg(getPHISourceReg(*PHIDefInstr, 1), TRI)
2298                        << ", BB#" << (*SRI).second->getNumber());
2299         }
2300       } else {
2301         MIB.addReg(SourceReg);
2302         MIB.addMBB((*SRI).second);
2303         DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#"
2304                      << (*SRI).second->getNumber() << ", ");
2305       }
2306     }
2307 
2308     // Add the final backedge register source to the entry phi
2309     if (CurrentBackedgeReg != 0) {
2310       MIB.addReg(CurrentBackedgeReg);
2311       MIB.addMBB(Exit);
2312       DEBUG(dbgs() << PrintReg(CurrentBackedgeReg, TRI) << ", BB#"
2313                    << Exit->getNumber() << ")\n");
2314     } else {
2315       DEBUG(dbgs() << ")\n");
2316     }
2317   }
2318 }
2319 
2320 void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) {
2321   DEBUG(PHIInfo.dump(MRI));
2322 
2323   for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2324        ++DRI) {
2325 
2326     unsigned DestReg = *DRI;
2327     createEntryPHI(CurrentRegion, DestReg);
2328   }
2329   PHIInfo.clear();
2330 }
2331 
2332 void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register,
2333                                                  unsigned NewRegister) {
2334   assert(Register != NewRegister && "Cannot replace a reg with itself");
2335 
2336   for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register),
2337                                          E = MRI->reg_end();
2338        I != E;) {
2339     MachineOperand &O = *I;
2340     ++I;
2341     if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) {
2342       DEBUG(dbgs() << "Trying to substitute physical register: "
2343                    << PrintReg(NewRegister, MRI->getTargetRegisterInfo())
2344                    << "\n");
2345       llvm_unreachable("Cannot substitute physical registers");
2346       // We don't handle physical registers, but if we need to
2347       // in the future This is how we do it:
2348       // O.substPhysReg(NewRegister, *TRI);
2349     } else {
2350       DEBUG(dbgs() << "Replacing register: "
2351                    << PrintReg(Register, MRI->getTargetRegisterInfo())
2352                    << " with "
2353                    << PrintReg(NewRegister, MRI->getTargetRegisterInfo())
2354                    << "\n");
2355       O.setReg(NewRegister);
2356     }
2357   }
2358   PHIInfo.deleteDef(Register);
2359 
2360   getRegionMRT()->replaceLiveOutReg(Register, NewRegister);
2361 
2362   DEBUG(PHIInfo.dump(MRI));
2363 }
2364 
2365 void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) {
2366   DEBUG(dbgs() << "Resolve PHI Infos\n");
2367   DEBUG(PHIInfo.dump(MRI));
2368   for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE;
2369        ++DRI) {
2370     unsigned DestReg = *DRI;
2371     DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n");
2372     auto SRI = PHIInfo.sources_begin(DestReg);
2373     unsigned SourceReg = (*SRI).first;
2374     DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI)
2375                  << " SourceReg: " << PrintReg(SourceReg, TRI) << "\n");
2376 
2377     assert(PHIInfo.sources_end(DestReg) == ++SRI &&
2378            "More than one phi source in entry node");
2379     replaceRegisterWith(DestReg, SourceReg);
2380   }
2381 }
2382 
2383 static bool isFunctionEntryBlock(MachineBasicBlock *MBB) {
2384   return ((&(*(MBB->getParent()->begin()))) == MBB);
2385 }
2386 
2387 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2388     MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB,
2389     LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn,
2390     unsigned BBSelectRegOut) {
2391   if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) {
2392     // Handle non-loop function entry block.
2393     // We need to allow loops to the entry block and then
2394     rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2395     resolvePHIInfos(CodeBB);
2396     removeExternalCFGSuccessors(CodeBB);
2397     CodeBB->addSuccessor(MergeBB);
2398     CurrentRegion->addMBB(CodeBB);
2399     return nullptr;
2400   }
2401   if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) {
2402     // Handle non-loop region entry block.
2403     MachineFunction *MF = MergeBB->getParent();
2404     auto MergeIter = MergeBB->getIterator();
2405     auto CodeBBStartIter = CodeBB->getIterator();
2406     auto CodeBBEndIter = ++(CodeBB->getIterator());
2407     if (CodeBBEndIter != MergeIter) {
2408       MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter);
2409     }
2410     rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut);
2411     prunePHIInfo(CodeBB);
2412     createEntryPHIs(CurrentRegion);
2413     removeExternalCFGSuccessors(CodeBB);
2414     CodeBB->addSuccessor(MergeBB);
2415     CurrentRegion->addMBB(CodeBB);
2416     return nullptr;
2417   } else {
2418     // Handle internal block.
2419     const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn);
2420     unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass);
2421     rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg);
2422     bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB;
2423     MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB,
2424                                             BBSelectRegIn, IsRegionEntryBB);
2425     CurrentRegion->addMBB(IfBB);
2426     // If this is the entry block we need to make the If block the new
2427     // linearized region entry.
2428     if (IsRegionEntryBB) {
2429       CurrentRegion->setEntry(IfBB);
2430 
2431       if (CurrentRegion->getHasLoop()) {
2432         MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2433         MachineBasicBlock *ETrueBB = nullptr;
2434         MachineBasicBlock *EFalseBB = nullptr;
2435         SmallVector<MachineOperand, 1> ECond;
2436 
2437         const DebugLoc &DL = DebugLoc();
2438         TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2439         TII->removeBranch(*RegionExit);
2440 
2441         // We need to create a backedge if there is a loop
2442         unsigned Reg = TII->insertNE(
2443             RegionExit, RegionExit->instr_end(), DL,
2444             CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2445             CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2446         MachineOperand RegOp =
2447             MachineOperand::CreateReg(Reg, false, false, true);
2448         ArrayRef<MachineOperand> Cond(RegOp);
2449         DEBUG(dbgs() << "RegionExitReg: ");
2450         DEBUG(Cond[0].print(dbgs(), TRI));
2451         DEBUG(dbgs() << "\n");
2452         TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2453                           Cond, DebugLoc());
2454         RegionExit->addSuccessor(CurrentRegion->getEntry());
2455       }
2456     }
2457     CurrentRegion->addMBB(CodeBB);
2458     LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo);
2459 
2460     InnerRegion.setParent(CurrentRegion);
2461     DEBUG(dbgs() << "Insert BB Select PHI (BB)\n");
2462     insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2463                    CodeBBSelectReg);
2464     InnerRegion.addMBB(MergeBB);
2465 
2466     DEBUG(InnerRegion.print(dbgs(), TRI));
2467     rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion);
2468     extractKilledPHIs(CodeBB);
2469     if (IsRegionEntryBB) {
2470       createEntryPHIs(CurrentRegion);
2471     }
2472     return IfBB;
2473   }
2474 }
2475 
2476 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion(
2477     MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion,
2478     LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB,
2479     unsigned BBSelectRegIn, unsigned BBSelectRegOut) {
2480   unsigned CodeBBSelectReg =
2481       InnerRegion->getRegionMRT()->getInnerOutputRegister();
2482   MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry();
2483   MachineBasicBlock *CodeExitBB = InnerRegion->getExit();
2484   MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB,
2485                                           SelectBB, BBSelectRegIn, true);
2486   CurrentRegion->addMBB(IfBB);
2487   bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry();
2488   if (isEntry) {
2489 
2490     if (CurrentRegion->getHasLoop()) {
2491       MachineBasicBlock *RegionExit = CurrentRegion->getExit();
2492       MachineBasicBlock *ETrueBB = nullptr;
2493       MachineBasicBlock *EFalseBB = nullptr;
2494       SmallVector<MachineOperand, 1> ECond;
2495 
2496       const DebugLoc &DL = DebugLoc();
2497       TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond);
2498       TII->removeBranch(*RegionExit);
2499 
2500       // We need to create a backedge if there is a loop
2501       unsigned Reg =
2502           TII->insertNE(RegionExit, RegionExit->instr_end(), DL,
2503                         CurrentRegion->getRegionMRT()->getInnerOutputRegister(),
2504                         CurrentRegion->getRegionMRT()->getEntry()->getNumber());
2505       MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true);
2506       ArrayRef<MachineOperand> Cond(RegOp);
2507       DEBUG(dbgs() << "RegionExitReg: ");
2508       DEBUG(Cond[0].print(dbgs(), TRI));
2509       DEBUG(dbgs() << "\n");
2510       TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit,
2511                         Cond, DebugLoc());
2512       RegionExit->addSuccessor(IfBB);
2513     }
2514   }
2515   CurrentRegion->addMBBs(InnerRegion);
2516   DEBUG(dbgs() << "Insert BB Select PHI (region)\n");
2517   insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn,
2518                  CodeBBSelectReg);
2519 
2520   rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion,
2521                      CurrentRegion);
2522 
2523   rewriteRegionEntryPHIs(InnerRegion, IfBB);
2524 
2525   if (isEntry) {
2526     CurrentRegion->setEntry(IfBB);
2527   }
2528 
2529   if (isEntry) {
2530     createEntryPHIs(CurrentRegion);
2531   }
2532 
2533   return IfBB;
2534 }
2535 
2536 void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI,
2537                                           MachineBasicBlock *Entry,
2538                                           MachineBasicBlock *EntrySucc,
2539                                           LinearizedRegion *LRegion) {
2540   SmallVector<unsigned, 2> PHIRegionIndices;
2541   getPHIRegionIndices(LRegion, PHI, PHIRegionIndices);
2542 
2543   assert(PHIRegionIndices.size() == 1);
2544 
2545   unsigned RegionIndex = PHIRegionIndices[0];
2546   unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex);
2547   MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex);
2548   unsigned PHIDest = getPHIDestReg(PHI);
2549   unsigned PHISource = PHIDest;
2550   unsigned ReplaceReg;
2551 
2552   if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) {
2553     PHISource = ReplaceReg;
2554   }
2555 
2556   const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest);
2557   unsigned NewDestReg = MRI->createVirtualRegister(RegClass);
2558   LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI);
2559   MachineInstrBuilder MIB =
2560       BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(),
2561               TII->get(TargetOpcode::PHI), NewDestReg);
2562   DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI)
2563                << "<def> = PHI(");
2564   MIB.addReg(PHISource);
2565   MIB.addMBB(Entry);
2566   DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber());
2567   MIB.addReg(RegionSourceReg);
2568   MIB.addMBB(RegionSourceMBB);
2569   DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << ", BB#"
2570                << RegionSourceMBB->getNumber() << ")\n");
2571 }
2572 
2573 void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry,
2574                                            MachineBasicBlock *EntrySucc,
2575                                            LinearizedRegion *LRegion) {
2576   SmallVector<MachineInstr *, 2> PHIs;
2577   collectPHIs(Entry, PHIs);
2578 
2579   for (auto PHII : PHIs) {
2580     splitLoopPHI(*PHII, Entry, EntrySucc, LRegion);
2581   }
2582 }
2583 
2584 // Split the exit block so that we can insert a end control flow
2585 MachineBasicBlock *
2586 AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) {
2587   auto MRTRegion = LRegion->getRegionMRT();
2588   auto Exit = LRegion->getExit();
2589   auto MF = Exit->getParent();
2590   auto Succ = MRTRegion->getSucc();
2591 
2592   auto NewExit = MF->CreateMachineBasicBlock();
2593   auto AfterExitIter = Exit->getIterator();
2594   AfterExitIter++;
2595   MF->insert(AfterExitIter, NewExit);
2596   Exit->removeSuccessor(Succ);
2597   Exit->addSuccessor(NewExit);
2598   NewExit->addSuccessor(Succ);
2599   insertUnconditionalBranch(NewExit, Succ);
2600   LRegion->addMBB(NewExit);
2601   LRegion->setExit(NewExit);
2602 
2603   DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n");
2604 
2605   // Replace any PHI Predecessors in the successor with NewExit
2606   for (auto &II : *Succ) {
2607     MachineInstr &Instr = II;
2608 
2609     // If we are past the PHI instructions we are done
2610     if (!Instr.isPHI())
2611       break;
2612 
2613     int numPreds = getPHINumInputs(Instr);
2614     for (int i = 0; i < numPreds; ++i) {
2615       auto Pred = getPHIPred(Instr, i);
2616       if (Pred == Exit) {
2617         setPhiPred(Instr, i, NewExit);
2618       }
2619     }
2620   }
2621 
2622   return NewExit;
2623 }
2624 
2625 
2626 static MachineBasicBlock *split(MachineBasicBlock::iterator I) {
2627   // Create the fall-through block.
2628   MachineBasicBlock *MBB = (*I).getParent();
2629   MachineFunction *MF = MBB->getParent();
2630   MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock();
2631   auto MBBIter = ++(MBB->getIterator());
2632   MF->insert(MBBIter, SuccMBB);
2633   SuccMBB->transferSuccessorsAndUpdatePHIs(MBB);
2634   MBB->addSuccessor(SuccMBB);
2635 
2636   // Splice the code over.
2637   SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end());
2638 
2639   return SuccMBB;
2640 }
2641 
2642 // Split the entry block separating PHI-nodes and the rest of the code
2643 // This is needed to insert an initializer for the bb select register
2644 // inloop regions.
2645 
2646 MachineBasicBlock *
2647 AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) {
2648   MachineBasicBlock *Entry = LRegion->getEntry();
2649   MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI());
2650   MachineBasicBlock *Exit = LRegion->getExit();
2651 
2652   DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#"
2653                << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber()
2654                << "\n");
2655   LRegion->addMBB(EntrySucc);
2656 
2657   // Make the backedge go to Entry Succ
2658   if (Exit->isSuccessor(Entry)) {
2659     Exit->removeSuccessor(Entry);
2660   }
2661   Exit->addSuccessor(EntrySucc);
2662   MachineInstr &Branch = *(Exit->instr_rbegin());
2663   for (auto &UI : Branch.uses()) {
2664     if (UI.isMBB() && UI.getMBB() == Entry) {
2665       UI.setMBB(EntrySucc);
2666     }
2667   }
2668 
2669   splitLoopPHIs(Entry, EntrySucc, LRegion);
2670 
2671   return EntrySucc;
2672 }
2673 
2674 LinearizedRegion *
2675 AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) {
2676   LinearizedRegion *LRegion = Region->getLinearizedRegion();
2677   LRegion->initLiveOut(Region, MRI, TRI, PHIInfo);
2678   LRegion->setEntry(Region->getEntry());
2679   return LRegion;
2680 }
2681 
2682 static void removeOldExitPreds(RegionMRT *Region) {
2683   MachineBasicBlock *Exit = Region->getSucc();
2684   if (Exit == nullptr) {
2685     return;
2686   }
2687   for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(),
2688                                         E = Exit->pred_end();
2689        PI != E; ++PI) {
2690     if (Region->contains(*PI)) {
2691       (*PI)->removeSuccessor(Exit);
2692     }
2693   }
2694 }
2695 
2696 static bool mbbHasBackEdge(MachineBasicBlock *MBB,
2697                            SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
2698   for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) {
2699     if (MBBs.count(*SI) != 0) {
2700       return true;
2701     }
2702   }
2703   return false;
2704 }
2705 
2706 static bool containsNewBackedge(MRT *Tree,
2707                                 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) {
2708   // Need to traverse this in reverse since it is in post order.
2709   if (Tree == nullptr)
2710     return false;
2711 
2712   if (Tree->isMBB()) {
2713     MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB();
2714     MBBs.insert(MBB);
2715     if (mbbHasBackEdge(MBB, MBBs)) {
2716       return true;
2717     }
2718   } else {
2719     RegionMRT *Region = Tree->getRegionMRT();
2720     SetVector<MRT *> *Children = Region->getChildren();
2721     for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) {
2722       if (containsNewBackedge(*CI, MBBs))
2723         return true;
2724     }
2725   }
2726   return false;
2727 }
2728 
2729 static bool containsNewBackedge(RegionMRT *Region) {
2730   SmallPtrSet<MachineBasicBlock *, 8> MBBs;
2731   return containsNewBackedge(Region, MBBs);
2732 }
2733 
2734 bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) {
2735   auto *LRegion = initLinearizedRegion(Region);
2736   LRegion->setHasLoop(containsNewBackedge(Region));
2737   MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region);
2738   MachineBasicBlock *CurrentMerge = LastMerge;
2739   LRegion->addMBB(LastMerge);
2740   LRegion->setExit(LastMerge);
2741 
2742   rewriteRegionExitPHIs(Region, LastMerge, LRegion);
2743   removeOldExitPreds(Region);
2744 
2745   DEBUG(PHIInfo.dump(MRI));
2746 
2747   auto Entry = Region->getEntry();
2748 
2749   SetVector<MRT *> *Children = Region->getChildren();
2750   DEBUG(dbgs() << "===========If Region Start===============\n");
2751   if (LRegion->getHasLoop()) {
2752     DEBUG(dbgs() << "Has Backedge: Yes\n");
2753   } else {
2754     DEBUG(dbgs() << "Has Backedge: No\n");
2755   }
2756 
2757   unsigned BBSelectRegIn;
2758   unsigned BBSelectRegOut;
2759   for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) {
2760     DEBUG(dbgs() << "CurrentRegion: \n");
2761     DEBUG(LRegion->print(dbgs(), TRI));
2762 
2763     auto CNI = CI;
2764     ++CNI;
2765 
2766     MRT *Child = (*CI);
2767 
2768     if (Child->isRegion()) {
2769 
2770       LinearizedRegion *InnerLRegion =
2771           Child->getRegionMRT()->getLinearizedRegion();
2772       // We found the block is the exit of an inner region, we need
2773       // to put it in the current linearized region.
2774 
2775       DEBUG(dbgs() << "Linearizing region: ");
2776       DEBUG(InnerLRegion->print(dbgs(), TRI));
2777       DEBUG(dbgs() << "\n");
2778 
2779       MachineBasicBlock *InnerEntry = InnerLRegion->getEntry();
2780       if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) {
2781         // Entry has already been linearized, no need to do this region.
2782         unsigned OuterSelect = InnerLRegion->getBBSelectRegOut();
2783         unsigned InnerSelectReg =
2784             InnerLRegion->getRegionMRT()->getInnerOutputRegister();
2785         replaceRegisterWith(InnerSelectReg, OuterSelect),
2786             resolvePHIInfos(InnerEntry);
2787         if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge))
2788           InnerLRegion->getExit()->addSuccessor(CurrentMerge);
2789         continue;
2790       }
2791 
2792       BBSelectRegOut = Child->getBBSelectRegOut();
2793       BBSelectRegIn = Child->getBBSelectRegIn();
2794 
2795       DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI)
2796                    << "\n");
2797       DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI)
2798                    << "\n");
2799 
2800       MachineBasicBlock *IfEnd = CurrentMerge;
2801       CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion,
2802                                     Child->getRegionMRT()->getEntry(),
2803                                     BBSelectRegIn, BBSelectRegOut);
2804       TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2805     } else {
2806       MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB();
2807       DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n");
2808 
2809       if (MBB == getSingleExitNode(*(MBB->getParent()))) {
2810         // If this is the exit block then we need to skip to the next.
2811         // The "in" register will be transferred to "out" in the next
2812         // iteration.
2813         continue;
2814       }
2815 
2816       BBSelectRegOut = Child->getBBSelectRegOut();
2817       BBSelectRegIn = Child->getBBSelectRegIn();
2818 
2819       DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI)
2820                    << "\n");
2821       DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI)
2822                    << "\n");
2823 
2824       MachineBasicBlock *IfEnd = CurrentMerge;
2825       // This is a basic block that is not part of an inner region, we
2826       // need to put it in the current linearized region.
2827       CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn,
2828                                     BBSelectRegOut);
2829       if (CurrentMerge) {
2830         TII->convertNonUniformIfRegion(CurrentMerge, IfEnd);
2831       }
2832 
2833       DEBUG(PHIInfo.dump(MRI));
2834     }
2835   }
2836 
2837   LRegion->removeFalseRegisterKills(MRI);
2838 
2839   if (LRegion->getHasLoop()) {
2840     MachineBasicBlock *NewSucc = splitEntry(LRegion);
2841     if (isFunctionEntryBlock(LRegion->getEntry())) {
2842       resolvePHIInfos(LRegion->getEntry());
2843     }
2844     const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI());
2845     unsigned InReg = LRegion->getBBSelectRegIn();
2846     unsigned InnerSelectReg =
2847         MRI->createVirtualRegister(MRI->getRegClass(InReg));
2848     unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg));
2849     TII->materializeImmediate(*(LRegion->getEntry()),
2850 			      LRegion->getEntry()->getFirstTerminator(), DL,
2851 			      NewInReg, Region->getEntry()->getNumber());
2852     // Need to be careful about updating the registers inside the region.
2853     LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI);
2854     DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n");
2855     insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc,
2856                    InnerSelectReg, NewInReg,
2857                    LRegion->getRegionMRT()->getInnerOutputRegister());
2858     splitExit(LRegion);
2859     TII->convertNonUniformLoopRegion(NewSucc, LastMerge);
2860   }
2861 
2862   if (Region->isRoot()) {
2863     TII->insertReturn(*LastMerge);
2864   }
2865 
2866   DEBUG(Entry->getParent()->dump());
2867   DEBUG(LRegion->print(dbgs(), TRI));
2868   DEBUG(PHIInfo.dump(MRI));
2869 
2870   DEBUG(dbgs() << "===========If Region End===============\n");
2871 
2872   Region->setLinearizedRegion(LRegion);
2873   return true;
2874 }
2875 
2876 bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) {
2877   if (false && regionIsSimpleIf(Region)) {
2878     transformSimpleIfRegion(Region);
2879     return true;
2880   } else if (regionIsSequence(Region)) {
2881     fixupRegionExits(Region);
2882     return false;
2883   } else {
2884     structurizeComplexRegion(Region);
2885   }
2886   return false;
2887 }
2888 
2889 static int structurize_once = 0;
2890 
2891 bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region,
2892                                                 bool isTopRegion) {
2893   bool Changed = false;
2894 
2895   auto Children = Region->getChildren();
2896   for (auto CI : *Children) {
2897     if (CI->isRegion()) {
2898       Changed |= structurizeRegions(CI->getRegionMRT(), false);
2899     }
2900   }
2901 
2902   if (structurize_once < 2 || true) {
2903     Changed |= structurizeRegion(Region);
2904     structurize_once++;
2905   }
2906   return Changed;
2907 }
2908 
2909 void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) {
2910   DEBUG(dbgs() << "Fallthrough Map:\n");
2911   for (auto &MBBI : MF) {
2912     MachineBasicBlock *MBB = MBBI.getFallThrough();
2913     if (MBB != nullptr) {
2914       DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> "
2915                    << MBB->getNumber() << "\n");
2916     }
2917     FallthroughMap[&MBBI] = MBB;
2918   }
2919 }
2920 
2921 void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region,
2922                                                     unsigned SelectOut) {
2923   LinearizedRegion *LRegion = new LinearizedRegion();
2924   if (SelectOut) {
2925     LRegion->addLiveOut(SelectOut);
2926     DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(SelectOut, TRI)
2927                  << "\n");
2928   }
2929   LRegion->setRegionMRT(Region);
2930   Region->setLinearizedRegion(LRegion);
2931   LRegion->setParent(Region->getParent()
2932                          ? Region->getParent()->getLinearizedRegion()
2933                          : nullptr);
2934 }
2935 
2936 unsigned
2937 AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut,
2938                                                   MachineRegisterInfo *MRI,
2939                                                   const SIInstrInfo *TII) {
2940   if (MRT->isRegion()) {
2941     RegionMRT *Region = MRT->getRegionMRT();
2942     Region->setBBSelectRegOut(SelectOut);
2943     unsigned InnerSelectOut = createBBSelectReg(TII, MRI);
2944 
2945     // Fixme: Move linearization creation to the original spot
2946     createLinearizedRegion(Region, SelectOut);
2947 
2948     for (auto CI = Region->getChildren()->begin(),
2949               CE = Region->getChildren()->end();
2950          CI != CE; ++CI) {
2951       InnerSelectOut =
2952           initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII);
2953     }
2954     MRT->setBBSelectRegIn(InnerSelectOut);
2955     return InnerSelectOut;
2956   } else {
2957     MRT->setBBSelectRegOut(SelectOut);
2958     unsigned NewSelectIn = createBBSelectReg(TII, MRI);
2959     MRT->setBBSelectRegIn(NewSelectIn);
2960     return NewSelectIn;
2961   }
2962 }
2963 
2964 static void checkRegOnlyPHIInputs(MachineFunction &MF) {
2965   for (auto &MBBI : MF) {
2966     for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(),
2967                                            E = MBBI.instr_end();
2968          I != E; ++I) {
2969       MachineInstr &Instr = *I;
2970       if (Instr.isPHI()) {
2971         int numPreds = getPHINumInputs(Instr);
2972         for (int i = 0; i < numPreds; ++i) {
2973           assert(Instr.getOperand(i * 2 + 1).isReg() &&
2974                  "PHI Operand not a register");
2975         }
2976       }
2977     }
2978   }
2979 }
2980 
2981 
2982 INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2983                       "AMDGPU Machine CFG Structurizer", false, false)
2984 INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass)
2985 INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer",
2986                     "AMDGPU Machine CFG Structurizer", false, false)
2987 
2988 char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID;
2989 
2990 
2991 bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) {
2992   const SISubtarget &ST = MF.getSubtarget<SISubtarget>();
2993   const SIInstrInfo *TII = ST.getInstrInfo();
2994   TRI = ST.getRegisterInfo();
2995   MRI = &(MF.getRegInfo());
2996   initFallthroughMap(MF);
2997 
2998   checkRegOnlyPHIInputs(MF);
2999   DEBUG(dbgs() << "----STRUCTURIZER START----\n");
3000   DEBUG(MF.dump());
3001 
3002   Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo());
3003   DEBUG(Regions->dump());
3004 
3005   RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI);
3006   setRegionMRT(RTree);
3007   initializeSelectRegisters(RTree, 0, MRI, TII);
3008   DEBUG(RTree->dump(TRI));
3009   bool result = structurizeRegions(RTree, true);
3010   delete RTree;
3011   DEBUG(dbgs() << "----STRUCTURIZER END----\n");
3012   initFallthroughMap(MF);
3013   return result;
3014 }
3015 
3016 FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() {
3017   return new AMDGPUMachineCFGStructurizer();
3018 }
3019 
3020