1 //===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the machine instruction level CFG structurizer pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "SIInstrInfo.h" 16 #include "AMDGPUSubtarget.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/Analysis/CFG.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegionInfo.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/IR/DebugLoc.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Target/TargetInstrInfo.h" 33 #include "llvm/Target/TargetLowering.h" 34 #include "llvm/Target/TargetSubtargetInfo.h" 35 #include <tuple> 36 using namespace llvm; 37 38 #define DEBUG_TYPE "amdgpucfgstructurizer" 39 40 namespace { 41 class PHILinearizeDestIterator; 42 43 class PHILinearize { 44 friend class PHILinearizeDestIterator; 45 46 public: 47 typedef std::pair<unsigned, MachineBasicBlock *> PHISourceT; 48 49 private: 50 typedef DenseSet<PHISourceT> PHISourcesT; 51 typedef struct { 52 unsigned DestReg; 53 DebugLoc DL; 54 PHISourcesT Sources; 55 } PHIInfoElementT; 56 typedef SmallPtrSet<PHIInfoElementT *, 2> PHIInfoT; 57 PHIInfoT PHIInfo; 58 59 static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); 60 static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef); 61 static DebugLoc phiInfoElementGetDebugLoc(PHIInfoElementT *Info); 62 static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info); 63 static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg, 64 MachineBasicBlock *SourceMBB); 65 static void phiInfoElementRemoveSource(PHIInfoElementT *Info, 66 unsigned SourceReg, 67 MachineBasicBlock *SourceMBB); 68 PHIInfoElementT *findPHIInfoElement(unsigned DestReg); 69 PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg, 70 MachineBasicBlock *SourceMBB); 71 72 public: 73 bool findSourcesFromMBB(MachineBasicBlock *SourceMBB, 74 SmallVector<unsigned, 4> &Sources); 75 void addDest(unsigned DestReg, const DebugLoc &DL); 76 void replaceDef(unsigned OldDestReg, unsigned NewDestReg); 77 void deleteDef(unsigned DestReg); 78 void addSource(unsigned DestReg, unsigned SourceReg, 79 MachineBasicBlock *SourceMBB); 80 void removeSource(unsigned DestReg, unsigned SourceReg, 81 MachineBasicBlock *SourceMBB = nullptr); 82 bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, 83 unsigned &DestReg); 84 bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr); 85 unsigned getNumSources(unsigned DestReg); 86 void dump(MachineRegisterInfo *MRI); 87 void clear(); 88 89 typedef PHISourcesT::iterator source_iterator; 90 typedef PHILinearizeDestIterator dest_iterator; 91 92 dest_iterator dests_begin(); 93 dest_iterator dests_end(); 94 95 source_iterator sources_begin(unsigned Reg); 96 source_iterator sources_end(unsigned Reg); 97 }; 98 99 class PHILinearizeDestIterator { 100 private: 101 PHILinearize::PHIInfoT::iterator Iter; 102 103 public: 104 unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); } 105 PHILinearizeDestIterator &operator++() { 106 ++Iter; 107 return *this; 108 } 109 bool operator==(const PHILinearizeDestIterator &I) const { 110 return I.Iter == Iter; 111 } 112 bool operator!=(const PHILinearizeDestIterator &I) const { 113 return I.Iter != Iter; 114 } 115 116 PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} 117 }; 118 119 unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { 120 return Info->DestReg; 121 } 122 123 void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info, 124 unsigned NewDef) { 125 Info->DestReg = NewDef; 126 } 127 128 DebugLoc PHILinearize::phiInfoElementGetDebugLoc(PHIInfoElementT *Info) { 129 return Info->DL; 130 } 131 132 PHILinearize::PHISourcesT & 133 PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) { 134 return Info->Sources; 135 } 136 137 void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info, 138 unsigned SourceReg, 139 MachineBasicBlock *SourceMBB) { 140 // Assertion ensures we don't use the same SourceMBB for the 141 // sources, because we cannot have different registers with 142 // identical predecessors, but we can have the same register for 143 // multiple predecessors. 144 for (auto SI : phiInfoElementGetSources(Info)) { 145 assert((SI.second != SourceMBB || SourceReg == SI.first)); 146 } 147 148 phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB)); 149 } 150 151 void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info, 152 unsigned SourceReg, 153 MachineBasicBlock *SourceMBB) { 154 auto &Sources = phiInfoElementGetSources(Info); 155 SmallVector<PHISourceT, 4> ElimiatedSources; 156 for (auto SI : Sources) { 157 if (SI.first == SourceReg && 158 (SI.second == nullptr || SI.second == SourceMBB)) { 159 ElimiatedSources.push_back(PHISourceT(SI.first, SI.second)); 160 } 161 } 162 163 for (auto &Source : ElimiatedSources) { 164 Sources.erase(Source); 165 } 166 } 167 168 PHILinearize::PHIInfoElementT * 169 PHILinearize::findPHIInfoElement(unsigned DestReg) { 170 for (auto I : PHIInfo) { 171 if (phiInfoElementGetDest(I) == DestReg) { 172 return I; 173 } 174 } 175 return nullptr; 176 } 177 178 PHILinearize::PHIInfoElementT * 179 PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg, 180 MachineBasicBlock *SourceMBB) { 181 for (auto I : PHIInfo) { 182 for (auto SI : phiInfoElementGetSources(I)) { 183 if (SI.first == SourceReg && 184 (SI.second == nullptr || SI.second == SourceMBB)) { 185 return I; 186 } 187 } 188 } 189 return nullptr; 190 } 191 192 bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB, 193 SmallVector<unsigned, 4> &Sources) { 194 bool FoundSource = false; 195 for (auto I : PHIInfo) { 196 for (auto SI : phiInfoElementGetSources(I)) { 197 if (SI.second == SourceMBB) { 198 FoundSource = true; 199 Sources.push_back(SI.first); 200 } 201 } 202 } 203 return FoundSource; 204 } 205 206 void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) { 207 assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists"); 208 PHISourcesT EmptySet; 209 PHIInfoElementT *NewElement = new PHIInfoElementT(); 210 NewElement->DestReg = DestReg; 211 NewElement->DL = DL; 212 NewElement->Sources = EmptySet; 213 PHIInfo.insert(NewElement); 214 } 215 216 void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) { 217 phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg); 218 } 219 220 void PHILinearize::deleteDef(unsigned DestReg) { 221 PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg); 222 PHIInfo.erase(InfoElement); 223 delete InfoElement; 224 } 225 226 void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg, 227 MachineBasicBlock *SourceMBB) { 228 phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); 229 } 230 231 void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg, 232 MachineBasicBlock *SourceMBB) { 233 phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); 234 } 235 236 bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, 237 unsigned &DestReg) { 238 PHIInfoElementT *InfoElement = 239 findPHIInfoElementFromSource(SourceReg, SourceMBB); 240 if (InfoElement != nullptr) { 241 DestReg = phiInfoElementGetDest(InfoElement); 242 return true; 243 } 244 return false; 245 } 246 247 bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) { 248 unsigned DestReg; 249 return findDest(Reg, SourceMBB, DestReg); 250 } 251 252 unsigned PHILinearize::getNumSources(unsigned DestReg) { 253 return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size(); 254 } 255 256 void PHILinearize::dump(MachineRegisterInfo *MRI) { 257 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 258 dbgs() << "=PHIInfo Start=\n"; 259 for (auto PII : this->PHIInfo) { 260 PHIInfoElementT &Element = *PII; 261 dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI) 262 << " Sources: {"; 263 for (auto &SI : Element.Sources) { 264 dbgs() << PrintReg(SI.first, TRI) << "(BB#" 265 << SI.second->getNumber() << "),"; 266 } 267 dbgs() << "}\n"; 268 } 269 dbgs() << "=PHIInfo End=\n"; 270 } 271 272 void PHILinearize::clear() { PHIInfo = PHIInfoT(); } 273 274 PHILinearize::dest_iterator PHILinearize::dests_begin() { 275 return PHILinearizeDestIterator(PHIInfo.begin()); 276 } 277 278 PHILinearize::dest_iterator PHILinearize::dests_end() { 279 return PHILinearizeDestIterator(PHIInfo.end()); 280 } 281 282 PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) { 283 auto InfoElement = findPHIInfoElement(Reg); 284 return phiInfoElementGetSources(InfoElement).begin(); 285 } 286 PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) { 287 auto InfoElement = findPHIInfoElement(Reg); 288 return phiInfoElementGetSources(InfoElement).end(); 289 } 290 291 class RegionMRT; 292 class MBBMRT; 293 294 static unsigned getPHINumInputs(MachineInstr &PHI) { 295 assert(PHI.isPHI()); 296 return (PHI.getNumOperands() - 1) / 2; 297 } 298 299 static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) { 300 assert(PHI.isPHI()); 301 return PHI.getOperand(Index * 2 + 2).getMBB(); 302 } 303 304 static void setPhiPred(MachineInstr &PHI, unsigned Index, 305 MachineBasicBlock *NewPred) { 306 PHI.getOperand(Index * 2 + 2).setMBB(NewPred); 307 } 308 309 static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) { 310 assert(PHI.isPHI()); 311 return PHI.getOperand(Index * 2 + 1).getReg(); 312 } 313 314 static unsigned getPHIDestReg(MachineInstr &PHI) { 315 assert(PHI.isPHI()); 316 return PHI.getOperand(0).getReg(); 317 } 318 319 class LinearizedRegion { 320 protected: 321 MachineBasicBlock *Entry; 322 // The exit block is part of the region, and is the last 323 // merge block before exiting the region. 324 MachineBasicBlock *Exit; 325 DenseSet<unsigned> LiveOuts; 326 SmallPtrSet<MachineBasicBlock *, 1> MBBs; 327 bool HasLoop; 328 LinearizedRegion *Parent; 329 RegionMRT *RMRT; 330 331 void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, 332 MachineInstr *DefInstr, const MachineRegisterInfo *MRI, 333 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 334 335 void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, 336 MachineInstr *DefInstr, 337 const MachineRegisterInfo *MRI, 338 const TargetRegisterInfo *TRI, 339 PHILinearize &PHIInfo); 340 341 void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 342 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, 343 RegionMRT *TopRegion); 344 345 void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 346 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 347 348 void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI, 349 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, 350 RegionMRT *TopRegion = nullptr); 351 352 public: 353 void setRegionMRT(RegionMRT *Region) { RMRT = Region; } 354 355 RegionMRT *getRegionMRT() { return RMRT; } 356 357 void setParent(LinearizedRegion *P) { Parent = P; } 358 359 LinearizedRegion *getParent() { return Parent; } 360 361 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr); 362 363 void setBBSelectRegIn(unsigned Reg); 364 365 unsigned getBBSelectRegIn(); 366 367 void setBBSelectRegOut(unsigned Reg, bool IsLiveOut); 368 369 unsigned getBBSelectRegOut(); 370 371 void setHasLoop(bool Value); 372 373 bool getHasLoop(); 374 375 void addLiveOut(unsigned VReg); 376 377 void removeLiveOut(unsigned Reg); 378 379 void replaceLiveOut(unsigned OldReg, unsigned NewReg); 380 381 void replaceRegister(unsigned Register, unsigned NewRegister, 382 MachineRegisterInfo *MRI, bool ReplaceInside, 383 bool ReplaceOutside, bool IncludeLoopPHIs); 384 385 void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister, 386 bool IncludeLoopPHIs, 387 MachineRegisterInfo *MRI); 388 389 void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister, 390 bool IncludeLoopPHIs, 391 MachineRegisterInfo *MRI); 392 393 DenseSet<unsigned> *getLiveOuts(); 394 395 void setEntry(MachineBasicBlock *NewEntry); 396 397 MachineBasicBlock *getEntry(); 398 399 void setExit(MachineBasicBlock *NewExit); 400 401 MachineBasicBlock *getExit(); 402 403 void addMBB(MachineBasicBlock *MBB); 404 405 void addMBBs(LinearizedRegion *InnerRegion); 406 407 bool contains(MachineBasicBlock *MBB); 408 409 bool isLiveOut(unsigned Reg); 410 411 bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI); 412 413 void removeFalseRegisterKills(MachineRegisterInfo *MRI); 414 415 void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI, 416 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 417 418 LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 419 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 420 421 LinearizedRegion(); 422 423 ~LinearizedRegion(); 424 }; 425 426 class MRT { 427 protected: 428 RegionMRT *Parent; 429 unsigned BBSelectRegIn; 430 unsigned BBSelectRegOut; 431 432 public: 433 unsigned getBBSelectRegIn() { return BBSelectRegIn; } 434 435 unsigned getBBSelectRegOut() { return BBSelectRegOut; } 436 437 void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; } 438 439 void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; } 440 441 virtual RegionMRT *getRegionMRT() { return nullptr; } 442 443 virtual MBBMRT *getMBBMRT() { return nullptr; } 444 445 bool isRegion() { return getRegionMRT() != nullptr; } 446 447 bool isMBB() { return getMBBMRT() != nullptr; } 448 449 bool isRoot() { return Parent == nullptr; } 450 451 void setParent(RegionMRT *Region) { Parent = Region; } 452 453 RegionMRT *getParent() { return Parent; } 454 455 static MachineBasicBlock * 456 initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, 457 DenseMap<MachineRegion *, RegionMRT *> &RegionMap); 458 459 static RegionMRT *buildMRT(MachineFunction &MF, 460 const MachineRegionInfo *RegionInfo, 461 const SIInstrInfo *TII, 462 MachineRegisterInfo *MRI); 463 464 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0; 465 466 void dumpDepth(int depth) { 467 for (int i = depth; i > 0; --i) { 468 dbgs() << " "; 469 } 470 } 471 472 virtual ~MRT() {} 473 }; 474 475 class MBBMRT : public MRT { 476 MachineBasicBlock *MBB; 477 478 public: 479 virtual MBBMRT *getMBBMRT() { return this; } 480 481 MachineBasicBlock *getMBB() { return MBB; } 482 483 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { 484 dumpDepth(depth); 485 dbgs() << "MBB: " << getMBB()->getNumber(); 486 dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); 487 dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; 488 } 489 490 MBBMRT(MachineBasicBlock *BB) : MBB(BB) { 491 setParent(nullptr); 492 setBBSelectRegOut(0); 493 setBBSelectRegIn(0); 494 } 495 }; 496 497 class RegionMRT : public MRT { 498 protected: 499 MachineRegion *Region; 500 LinearizedRegion *LRegion; 501 MachineBasicBlock *Succ; 502 503 SetVector<MRT *> Children; 504 505 public: 506 virtual RegionMRT *getRegionMRT() { return this; } 507 508 void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { 509 LRegion = LinearizeRegion; 510 } 511 512 LinearizedRegion *getLinearizedRegion() { return LRegion; } 513 514 MachineRegion *getMachineRegion() { return Region; } 515 516 unsigned getInnerOutputRegister() { 517 return (*(Children.begin()))->getBBSelectRegOut(); 518 } 519 520 void addChild(MRT *Tree) { Children.insert(Tree); } 521 522 SetVector<MRT *> *getChildren() { return &Children; } 523 524 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { 525 dumpDepth(depth); 526 dbgs() << "Region: " << (void *)Region; 527 dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); 528 dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; 529 530 dumpDepth(depth); 531 if (getSucc()) 532 dbgs() << "Succ: " << getSucc()->getNumber() << "\n"; 533 else 534 dbgs() << "Succ: none \n"; 535 for (auto MRTI : Children) { 536 MRTI->dump(TRI, depth + 1); 537 } 538 } 539 540 MRT *getEntryTree() { return Children.back(); } 541 542 MRT *getExitTree() { return Children.front(); } 543 544 MachineBasicBlock *getEntry() { 545 MRT *Tree = Children.back(); 546 return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry() 547 : Tree->getMBBMRT()->getMBB(); 548 } 549 550 MachineBasicBlock *getExit() { 551 MRT *Tree = Children.front(); 552 return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit() 553 : Tree->getMBBMRT()->getMBB(); 554 } 555 556 void setSucc(MachineBasicBlock *MBB) { Succ = MBB; } 557 558 MachineBasicBlock *getSucc() { return Succ; } 559 560 bool contains(MachineBasicBlock *MBB) { 561 for (auto CI : Children) { 562 if (CI->isMBB()) { 563 if (MBB == CI->getMBBMRT()->getMBB()) { 564 return true; 565 } 566 } else { 567 if (CI->getRegionMRT()->contains(MBB)) { 568 return true; 569 } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr && 570 CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) { 571 return true; 572 } 573 } 574 } 575 return false; 576 } 577 578 void replaceLiveOutReg(unsigned Register, unsigned NewRegister) { 579 LinearizedRegion *LRegion = getLinearizedRegion(); 580 LRegion->replaceLiveOut(Register, NewRegister); 581 for (auto &CI : Children) { 582 if (CI->isRegion()) { 583 CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister); 584 } 585 } 586 } 587 588 RegionMRT(MachineRegion *MachineRegion) 589 : Region(MachineRegion), LRegion(nullptr), Succ(nullptr) { 590 setParent(nullptr); 591 setBBSelectRegOut(0); 592 setBBSelectRegIn(0); 593 } 594 595 virtual ~RegionMRT() { 596 if (LRegion) { 597 delete LRegion; 598 } 599 600 for (auto CI : Children) { 601 delete &(*CI); 602 } 603 } 604 }; 605 606 static unsigned createBBSelectReg(const SIInstrInfo *TII, 607 MachineRegisterInfo *MRI) { 608 return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); 609 } 610 611 MachineBasicBlock * 612 MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, 613 DenseMap<MachineRegion *, RegionMRT *> &RegionMap) { 614 for (auto &MFI : MF) { 615 MachineBasicBlock *ExitMBB = &MFI; 616 if (ExitMBB->succ_size() == 0) { 617 return ExitMBB; 618 } 619 } 620 llvm_unreachable("CFG has no exit block"); 621 return nullptr; 622 } 623 624 RegionMRT *MRT::buildMRT(MachineFunction &MF, 625 const MachineRegionInfo *RegionInfo, 626 const SIInstrInfo *TII, MachineRegisterInfo *MRI) { 627 SmallPtrSet<MachineRegion *, 4> PlacedRegions; 628 DenseMap<MachineRegion *, RegionMRT *> RegionMap; 629 MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion(); 630 RegionMRT *Result = new RegionMRT(TopLevelRegion); 631 RegionMap[TopLevelRegion] = Result; 632 633 // Insert the exit block first, we need it to be the merge node 634 // for the top level region. 635 MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap); 636 637 unsigned BBSelectRegIn = createBBSelectReg(TII, MRI); 638 MBBMRT *ExitMRT = new MBBMRT(Exit); 639 RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT); 640 ExitMRT->setBBSelectRegIn(BBSelectRegIn); 641 642 for (auto MBBI : post_order(&(MF.front()))) { 643 MachineBasicBlock *MBB = &(*MBBI); 644 645 // Skip Exit since we already added it 646 if (MBB == Exit) { 647 continue; 648 } 649 650 DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n"); 651 MBBMRT *NewMBB = new MBBMRT(MBB); 652 MachineRegion *Region = RegionInfo->getRegionFor(MBB); 653 654 // Ensure we have the MRT region 655 if (RegionMap.count(Region) == 0) { 656 RegionMRT *NewMRTRegion = new RegionMRT(Region); 657 RegionMap[Region] = NewMRTRegion; 658 659 // Ensure all parents are in the RegionMap 660 MachineRegion *Parent = Region->getParent(); 661 while (RegionMap.count(Parent) == 0) { 662 RegionMRT *NewMRTParent = new RegionMRT(Parent); 663 NewMRTParent->addChild(NewMRTRegion); 664 NewMRTRegion->setParent(NewMRTParent); 665 RegionMap[Parent] = NewMRTParent; 666 NewMRTRegion = NewMRTParent; 667 Parent = Parent->getParent(); 668 } 669 RegionMap[Parent]->addChild(NewMRTRegion); 670 NewMRTRegion->setParent(RegionMap[Parent]); 671 } 672 673 // Add MBB to Region MRT 674 RegionMap[Region]->addChild(NewMBB); 675 NewMBB->setParent(RegionMap[Region]); 676 RegionMap[Region]->setSucc(Region->getExit()); 677 } 678 return Result; 679 } 680 681 void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, 682 MachineInstr *DefInstr, 683 const MachineRegisterInfo *MRI, 684 const TargetRegisterInfo *TRI, 685 PHILinearize &PHIInfo) { 686 if (TRI->isVirtualRegister(Reg)) { 687 DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); 688 // If this is a source register to a PHI we are chaining, it 689 // must be live out. 690 if (PHIInfo.isSource(Reg)) { 691 DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n"); 692 addLiveOut(Reg); 693 } else { 694 // If this is live out of the MBB 695 for (auto &UI : MRI->use_operands(Reg)) { 696 if (UI.getParent()->getParent() != MBB) { 697 DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber() 698 << "): " << PrintReg(Reg, TRI) << "\n"); 699 addLiveOut(Reg); 700 } else { 701 // If the use is in the same MBB we have to make sure 702 // it is after the def, otherwise it is live out in a loop 703 MachineInstr *UseInstr = UI.getParent(); 704 for (MachineBasicBlock::instr_iterator 705 MII = UseInstr->getIterator(), 706 MIE = UseInstr->getParent()->instr_end(); 707 MII != MIE; ++MII) { 708 if ((&(*MII)) == DefInstr) { 709 DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI) 710 << "\n"); 711 addLiveOut(Reg); 712 } 713 } 714 } 715 } 716 } 717 } 718 } 719 720 void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, 721 MachineInstr *DefInstr, 722 const MachineRegisterInfo *MRI, 723 const TargetRegisterInfo *TRI, 724 PHILinearize &PHIInfo) { 725 if (TRI->isVirtualRegister(Reg)) { 726 DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); 727 for (auto &UI : MRI->use_operands(Reg)) { 728 if (!Region->contains(UI.getParent()->getParent())) { 729 DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region 730 << "): " << PrintReg(Reg, TRI) << "\n"); 731 addLiveOut(Reg); 732 } 733 } 734 } 735 } 736 737 void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, 738 const MachineRegisterInfo *MRI, 739 const TargetRegisterInfo *TRI, 740 PHILinearize &PHIInfo) { 741 DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n"); 742 for (auto &II : *MBB) { 743 for (auto &RI : II.defs()) { 744 storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); 745 } 746 for (auto &IRI : II.implicit_operands()) { 747 if (IRI.isDef()) { 748 storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo); 749 } 750 } 751 } 752 753 // If we have a successor with a PHI, source coming from this MBB we have to 754 // add the register as live out 755 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 756 E = MBB->succ_end(); 757 SI != E; ++SI) { 758 for (auto &II : *(*SI)) { 759 if (II.isPHI()) { 760 MachineInstr &PHI = II; 761 int numPreds = getPHINumInputs(PHI); 762 for (int i = 0; i < numPreds; ++i) { 763 if (getPHIPred(PHI, i) == MBB) { 764 unsigned PHIReg = getPHISourceReg(PHI, i); 765 DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber() 766 << " -> BB#" << (*SI)->getNumber() 767 << "): " << PrintReg(PHIReg, TRI) << "\n"); 768 addLiveOut(PHIReg); 769 } 770 } 771 } 772 } 773 } 774 775 DEBUG(dbgs() << "-Store Live Outs Endn-\n"); 776 } 777 778 void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB, 779 const MachineRegisterInfo *MRI, 780 const TargetRegisterInfo *TRI, 781 PHILinearize &PHIInfo, 782 RegionMRT *TopRegion) { 783 for (auto &II : *MBB) { 784 for (auto &RI : II.defs()) { 785 storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI, 786 PHIInfo); 787 } 788 for (auto &IRI : II.implicit_operands()) { 789 if (IRI.isDef()) { 790 storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI, 791 TRI, PHIInfo); 792 } 793 } 794 } 795 } 796 797 void LinearizedRegion::storeLiveOuts(RegionMRT *Region, 798 const MachineRegisterInfo *MRI, 799 const TargetRegisterInfo *TRI, 800 PHILinearize &PHIInfo, 801 RegionMRT *CurrentTopRegion) { 802 MachineBasicBlock *Exit = Region->getSucc(); 803 804 RegionMRT *TopRegion = 805 CurrentTopRegion == nullptr ? Region : CurrentTopRegion; 806 807 // Check if exit is end of function, if so, no live outs. 808 if (Exit == nullptr) 809 return; 810 811 auto Children = Region->getChildren(); 812 for (auto CI : *Children) { 813 if (CI->isMBB()) { 814 auto MBB = CI->getMBBMRT()->getMBB(); 815 storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion); 816 } else { 817 LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion(); 818 // We should be limited to only store registers that are live out from the 819 // lineaized region 820 for (auto MBBI : SubRegion->MBBs) { 821 storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion); 822 } 823 } 824 } 825 826 if (CurrentTopRegion == nullptr) { 827 auto Succ = Region->getSucc(); 828 for (auto &II : *Succ) { 829 if (II.isPHI()) { 830 MachineInstr &PHI = II; 831 int numPreds = getPHINumInputs(PHI); 832 for (int i = 0; i < numPreds; ++i) { 833 if (Region->contains(getPHIPred(PHI, i))) { 834 unsigned PHIReg = getPHISourceReg(PHI, i); 835 DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region 836 << "): " << PrintReg(PHIReg, TRI) << "\n"); 837 addLiveOut(PHIReg); 838 } 839 } 840 } 841 } 842 } 843 } 844 845 void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { 846 OS << "Linearized Region {"; 847 bool IsFirst = true; 848 for (const auto &MBB : MBBs) { 849 if (IsFirst) { 850 IsFirst = false; 851 } else { 852 OS << " ,"; 853 } 854 OS << MBB->getNumber(); 855 } 856 OS << "} (" << Entry->getNumber() << ", " 857 << (Exit == nullptr ? -1 : Exit->getNumber()) 858 << "): In:" << PrintReg(getBBSelectRegIn(), TRI) 859 << " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {"; 860 for (auto &LI : LiveOuts) { 861 OS << PrintReg(LI, TRI) << " "; 862 } 863 OS << "} \n"; 864 } 865 866 unsigned LinearizedRegion::getBBSelectRegIn() { 867 return getRegionMRT()->getBBSelectRegIn(); 868 } 869 870 unsigned LinearizedRegion::getBBSelectRegOut() { 871 return getRegionMRT()->getBBSelectRegOut(); 872 } 873 874 void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; } 875 876 bool LinearizedRegion::getHasLoop() { return HasLoop; } 877 878 void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); } 879 880 void LinearizedRegion::removeLiveOut(unsigned Reg) { 881 if (isLiveOut(Reg)) 882 LiveOuts.erase(Reg); 883 } 884 885 void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) { 886 if (isLiveOut(OldReg)) { 887 removeLiveOut(OldReg); 888 addLiveOut(NewReg); 889 } 890 } 891 892 void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, 893 MachineRegisterInfo *MRI, 894 bool ReplaceInside, bool ReplaceOutside, 895 bool IncludeLoopPHI) { 896 assert(Register != NewRegister && "Cannot replace a reg with itself"); 897 898 DEBUG(dbgs() << "Pepareing to replace register (region): " 899 << PrintReg(Register, MRI->getTargetRegisterInfo()) << " with " 900 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); 901 902 // If we are replacing outside, we also need to update the LiveOuts 903 if (ReplaceOutside && 904 (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) { 905 LinearizedRegion *Current = this; 906 while (Current != nullptr && Current->getEntry() != nullptr) { 907 DEBUG(dbgs() << "Region before register replace\n"); 908 DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); 909 Current->replaceLiveOut(Register, NewRegister); 910 DEBUG(dbgs() << "Region after register replace\n"); 911 DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); 912 Current = Current->getParent(); 913 } 914 } 915 916 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 917 E = MRI->reg_end(); 918 I != E;) { 919 MachineOperand &O = *I; 920 ++I; 921 922 // We don't rewrite defs. 923 if (O.isDef()) 924 continue; 925 926 bool IsInside = contains(O.getParent()->getParent()); 927 bool IsLoopPHI = IsInside && (O.getParent()->isPHI() && 928 O.getParent()->getParent() == getEntry()); 929 bool ShouldReplace = (IsInside && ReplaceInside) || 930 (!IsInside && ReplaceOutside) || 931 (IncludeLoopPHI && IsLoopPHI); 932 if (ShouldReplace) { 933 934 if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { 935 DEBUG(dbgs() << "Trying to substitute physical register: " 936 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 937 << "\n"); 938 llvm_unreachable("Cannot substitute physical registers"); 939 } else { 940 DEBUG(dbgs() << "Replacing register (region): " 941 << PrintReg(Register, MRI->getTargetRegisterInfo()) 942 << " with " 943 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 944 << "\n"); 945 O.setReg(NewRegister); 946 } 947 } 948 } 949 } 950 951 void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register, 952 unsigned NewRegister, 953 bool IncludeLoopPHIs, 954 MachineRegisterInfo *MRI) { 955 replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs); 956 } 957 958 void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register, 959 unsigned NewRegister, 960 bool IncludeLoopPHIs, 961 MachineRegisterInfo *MRI) { 962 replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs); 963 } 964 965 DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; } 966 967 void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) { 968 Entry = NewEntry; 969 } 970 971 MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; } 972 973 void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; } 974 975 MachineBasicBlock *LinearizedRegion::getExit() { return Exit; } 976 977 void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); } 978 979 void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) { 980 for (const auto &MBB : InnerRegion->MBBs) { 981 addMBB(MBB); 982 } 983 } 984 985 bool LinearizedRegion::contains(MachineBasicBlock *MBB) { 986 return MBBs.count(MBB) == 1; 987 } 988 989 bool LinearizedRegion::isLiveOut(unsigned Reg) { 990 return LiveOuts.count(Reg) == 1; 991 } 992 993 bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { 994 return MRI->def_begin(Reg) == MRI->def_end(); 995 } 996 997 // After the code has been structurized, what was flagged as kills 998 // before are no longer register kills. 999 void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { 1000 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 1001 for (auto MBBI : MBBs) { 1002 MachineBasicBlock *MBB = MBBI; 1003 for (auto &II : *MBB) { 1004 for (auto &RI : II.uses()) { 1005 if (RI.isReg()) { 1006 unsigned Reg = RI.getReg(); 1007 if (TRI->isVirtualRegister(Reg)) { 1008 if (hasNoDef(Reg, MRI)) 1009 continue; 1010 if (!MRI->hasOneDef(Reg)) { 1011 DEBUG(this->getEntry()->getParent()->dump()); 1012 DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n"); 1013 } 1014 1015 if (MRI->def_begin(Reg) == MRI->def_end()) { 1016 DEBUG(dbgs() << "Register " 1017 << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1018 << " has NO defs\n"); 1019 } else if (!MRI->hasOneDef(Reg)) { 1020 DEBUG(dbgs() << "Register " 1021 << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1022 << " has multiple defs\n"); 1023 } 1024 1025 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 1026 MachineOperand *Def = &(*(MRI->def_begin(Reg))); 1027 MachineOperand *UseOperand = &(RI); 1028 bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; 1029 if (UseIsOutsideDefMBB && UseOperand->isKill()) { 1030 DEBUG(dbgs() << "Removing kill flag on register: " 1031 << PrintReg(Reg, TRI) << "\n"); 1032 UseOperand->setIsKill(false); 1033 } 1034 } 1035 } 1036 } 1037 } 1038 } 1039 } 1040 1041 void LinearizedRegion::initLiveOut(RegionMRT *Region, 1042 const MachineRegisterInfo *MRI, 1043 const TargetRegisterInfo *TRI, 1044 PHILinearize &PHIInfo) { 1045 storeLiveOuts(Region, MRI, TRI, PHIInfo); 1046 } 1047 1048 LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB, 1049 const MachineRegisterInfo *MRI, 1050 const TargetRegisterInfo *TRI, 1051 PHILinearize &PHIInfo) { 1052 setEntry(MBB); 1053 setExit(MBB); 1054 storeLiveOuts(MBB, MRI, TRI, PHIInfo); 1055 MBBs.insert(MBB); 1056 Parent = nullptr; 1057 } 1058 1059 LinearizedRegion::LinearizedRegion() { 1060 setEntry(nullptr); 1061 setExit(nullptr); 1062 Parent = nullptr; 1063 } 1064 1065 LinearizedRegion::~LinearizedRegion() {} 1066 1067 class AMDGPUMachineCFGStructurizer : public MachineFunctionPass { 1068 private: 1069 const MachineRegionInfo *Regions; 1070 const SIInstrInfo *TII; 1071 const TargetRegisterInfo *TRI; 1072 MachineRegisterInfo *MRI; 1073 unsigned BBSelectRegister; 1074 PHILinearize PHIInfo; 1075 DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap; 1076 1077 void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI, 1078 SmallVector<unsigned, 2> &RegionIndices); 1079 void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, 1080 SmallVector<unsigned, 2> &RegionIndices); 1081 void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, 1082 SmallVector<unsigned, 2> &PHINonRegionIndices); 1083 1084 void storePHILinearizationInfoDest( 1085 unsigned LDestReg, MachineInstr &PHI, 1086 SmallVector<unsigned, 2> *RegionIndices = nullptr); 1087 1088 unsigned storePHILinearizationInfo(MachineInstr &PHI, 1089 SmallVector<unsigned, 2> *RegionIndices); 1090 1091 void extractKilledPHIs(MachineBasicBlock *MBB); 1092 1093 bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices, 1094 unsigned *ReplaceReg); 1095 1096 bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1097 MachineBasicBlock *SourceMBB, 1098 SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg); 1099 1100 void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1101 MachineBasicBlock *LastMerge, 1102 SmallVector<unsigned, 2> &PHIRegionIndices); 1103 void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1104 MachineBasicBlock *IfMBB, 1105 SmallVector<unsigned, 2> &PHIRegionIndices); 1106 void replaceLiveOutRegs(MachineInstr &PHI, 1107 SmallVector<unsigned, 2> &PHIRegionIndices, 1108 unsigned CombinedSourceReg, 1109 LinearizedRegion *LRegion); 1110 void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge, 1111 MachineInstr &PHI, LinearizedRegion *LRegion); 1112 1113 void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge, 1114 LinearizedRegion *LRegion); 1115 void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB, 1116 MachineInstr &PHI); 1117 void rewriteRegionEntryPHIs(LinearizedRegion *Region, 1118 MachineBasicBlock *IfMBB); 1119 1120 bool regionIsSimpleIf(RegionMRT *Region); 1121 1122 void transformSimpleIfRegion(RegionMRT *Region); 1123 1124 void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II); 1125 1126 void insertUnconditionalBranch(MachineBasicBlock *MBB, 1127 MachineBasicBlock *Dest, 1128 const DebugLoc &DL = DebugLoc()); 1129 1130 MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region); 1131 1132 void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1133 MachineBasicBlock *MergeBB, unsigned DestRegister, 1134 unsigned IfSourceRegister, unsigned CodeSourceRegister, 1135 bool IsUndefIfSource = false); 1136 1137 MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB, 1138 MachineBasicBlock *CodeBBStart, 1139 MachineBasicBlock *CodeBBEnd, 1140 MachineBasicBlock *SelectBB, unsigned IfReg, 1141 bool InheritPreds); 1142 1143 void prunePHIInfo(MachineBasicBlock *MBB); 1144 void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg); 1145 1146 void createEntryPHIs(LinearizedRegion *CurrentRegion); 1147 void resolvePHIInfos(MachineBasicBlock *FunctionEntry); 1148 1149 void replaceRegisterWith(unsigned Register, unsigned NewRegister); 1150 1151 MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB, 1152 MachineBasicBlock *CodeBB, 1153 LinearizedRegion *LRegion, 1154 unsigned BBSelectRegIn, 1155 unsigned BBSelectRegOut); 1156 1157 MachineBasicBlock * 1158 createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion, 1159 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, 1160 unsigned BBSelectRegIn, unsigned BBSelectRegOut); 1161 void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond); 1162 1163 void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, 1164 MachineBasicBlock *MergeBB, 1165 unsigned BBSelectReg); 1166 1167 MachineInstr *getDefInstr(unsigned Reg); 1168 void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1169 MachineBasicBlock *MergeBB, 1170 LinearizedRegion *InnerRegion, unsigned DestReg, 1171 unsigned SourceReg); 1172 bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion, 1173 unsigned Register); 1174 void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1175 MachineBasicBlock *MergeBB, 1176 LinearizedRegion *InnerRegion, 1177 LinearizedRegion *LRegion); 1178 1179 void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry, 1180 MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion); 1181 void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc, 1182 LinearizedRegion *LRegion); 1183 1184 MachineBasicBlock *splitExit(LinearizedRegion *LRegion); 1185 1186 MachineBasicBlock *splitEntry(LinearizedRegion *LRegion); 1187 1188 LinearizedRegion *initLinearizedRegion(RegionMRT *Region); 1189 1190 bool structurizeComplexRegion(RegionMRT *Region); 1191 1192 bool structurizeRegion(RegionMRT *Region); 1193 1194 bool structurizeRegions(RegionMRT *Region, bool isTopRegion); 1195 1196 public: 1197 static char ID; 1198 1199 void getAnalysisUsage(AnalysisUsage &AU) const override { 1200 AU.addRequired<MachineRegionInfoPass>(); 1201 MachineFunctionPass::getAnalysisUsage(AU); 1202 } 1203 1204 AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) { 1205 initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); 1206 } 1207 1208 void initFallthroughMap(MachineFunction &MF); 1209 1210 void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut); 1211 1212 unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg, 1213 MachineRegisterInfo *MRI, 1214 const SIInstrInfo *TII); 1215 1216 RegionMRT *RMRT; 1217 void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; } 1218 1219 RegionMRT *getRegionMRT() { return RMRT; } 1220 1221 bool runOnMachineFunction(MachineFunction &MF) override; 1222 }; 1223 } 1224 1225 char AMDGPUMachineCFGStructurizer::ID = 0; 1226 1227 bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { 1228 MachineBasicBlock *Entry = Region->getEntry(); 1229 MachineBasicBlock *Succ = Region->getSucc(); 1230 bool FoundBypass = false; 1231 bool FoundIf = false; 1232 1233 if (Entry->succ_size() != 2) { 1234 return false; 1235 } 1236 1237 for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), 1238 E = Entry->succ_end(); 1239 SI != E; ++SI) { 1240 MachineBasicBlock *Current = *SI; 1241 1242 if (Current == Succ) { 1243 FoundBypass = true; 1244 } else if ((Current->succ_size() == 1) && 1245 *(Current->succ_begin()) == Succ) { 1246 FoundIf = true; 1247 } 1248 } 1249 1250 return FoundIf && FoundBypass; 1251 } 1252 1253 void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { 1254 MachineBasicBlock *Entry = Region->getEntry(); 1255 MachineBasicBlock *Exit = Region->getExit(); 1256 TII->convertNonUniformIfRegion(Entry, Exit); 1257 } 1258 1259 static void fixMBBTerminator(MachineBasicBlock *MBB) { 1260 1261 if (MBB->succ_size() == 1) { 1262 auto *Succ = *(MBB->succ_begin()); 1263 for (auto &TI : MBB->terminators()) { 1264 for (auto &UI : TI.uses()) { 1265 if (UI.isMBB() && UI.getMBB() != Succ) { 1266 UI.setMBB(Succ); 1267 } 1268 } 1269 } 1270 } 1271 } 1272 1273 static void fixRegionTerminator(RegionMRT *Region) { 1274 MachineBasicBlock *InternalSucc = nullptr; 1275 MachineBasicBlock *ExternalSucc = nullptr; 1276 LinearizedRegion *LRegion = Region->getLinearizedRegion(); 1277 auto Exit = LRegion->getExit(); 1278 1279 SmallPtrSet<MachineBasicBlock *, 2> Successors; 1280 for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(), 1281 SE = Exit->succ_end(); 1282 SI != SE; ++SI) { 1283 MachineBasicBlock *Succ = *SI; 1284 if (LRegion->contains(Succ)) { 1285 // Do not allow re-assign 1286 assert(InternalSucc == nullptr); 1287 InternalSucc = Succ; 1288 } else { 1289 // Do not allow re-assign 1290 assert(ExternalSucc == nullptr); 1291 ExternalSucc = Succ; 1292 } 1293 } 1294 1295 for (auto &TI : Exit->terminators()) { 1296 for (auto &UI : TI.uses()) { 1297 if (UI.isMBB()) { 1298 auto Target = UI.getMBB(); 1299 if (Target != InternalSucc && Target != ExternalSucc) { 1300 UI.setMBB(ExternalSucc); 1301 } 1302 } 1303 } 1304 } 1305 } 1306 1307 // If a region region is just a sequence of regions (and the exit 1308 // block in the case of the top level region), we can simply skip 1309 // linearizing it, because it is already linear 1310 bool regionIsSequence(RegionMRT *Region) { 1311 auto Children = Region->getChildren(); 1312 for (auto CI : *Children) { 1313 if (!CI->isRegion()) { 1314 if (CI->getMBBMRT()->getMBB()->succ_size() > 1) { 1315 return false; 1316 } 1317 } 1318 } 1319 return true; 1320 } 1321 1322 void fixupRegionExits(RegionMRT *Region) { 1323 auto Children = Region->getChildren(); 1324 for (auto CI : *Children) { 1325 if (!CI->isRegion()) { 1326 fixMBBTerminator(CI->getMBBMRT()->getMBB()); 1327 } else { 1328 fixRegionTerminator(CI->getRegionMRT()); 1329 } 1330 } 1331 } 1332 1333 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( 1334 RegionMRT *Region, MachineInstr &PHI, 1335 SmallVector<unsigned, 2> &PHIRegionIndices) { 1336 unsigned NumInputs = getPHINumInputs(PHI); 1337 for (unsigned i = 0; i < NumInputs; ++i) { 1338 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1339 if (Region->contains(Pred)) { 1340 PHIRegionIndices.push_back(i); 1341 } 1342 } 1343 } 1344 1345 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( 1346 LinearizedRegion *Region, MachineInstr &PHI, 1347 SmallVector<unsigned, 2> &PHIRegionIndices) { 1348 unsigned NumInputs = getPHINumInputs(PHI); 1349 for (unsigned i = 0; i < NumInputs; ++i) { 1350 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1351 if (Region->contains(Pred)) { 1352 PHIRegionIndices.push_back(i); 1353 } 1354 } 1355 } 1356 1357 void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices( 1358 LinearizedRegion *Region, MachineInstr &PHI, 1359 SmallVector<unsigned, 2> &PHINonRegionIndices) { 1360 unsigned NumInputs = getPHINumInputs(PHI); 1361 for (unsigned i = 0; i < NumInputs; ++i) { 1362 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1363 if (!Region->contains(Pred)) { 1364 PHINonRegionIndices.push_back(i); 1365 } 1366 } 1367 } 1368 1369 void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest( 1370 unsigned LDestReg, MachineInstr &PHI, 1371 SmallVector<unsigned, 2> *RegionIndices) { 1372 if (RegionIndices) { 1373 for (auto i : *RegionIndices) { 1374 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); 1375 } 1376 } else { 1377 unsigned NumInputs = getPHINumInputs(PHI); 1378 for (unsigned i = 0; i < NumInputs; ++i) { 1379 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); 1380 } 1381 } 1382 } 1383 1384 unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo( 1385 MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) { 1386 unsigned DestReg = getPHIDestReg(PHI); 1387 unsigned LinearizeDestReg = 1388 MRI->createVirtualRegister(MRI->getRegClass(DestReg)); 1389 PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); 1390 storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); 1391 return LinearizeDestReg; 1392 } 1393 1394 void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { 1395 // We need to create a new chain for the killed phi, but there is no 1396 // need to do the renaming outside or inside the block. 1397 SmallPtrSet<MachineInstr *, 2> PHIs; 1398 for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), 1399 E = MBB->instr_end(); 1400 I != E; ++I) { 1401 MachineInstr &Instr = *I; 1402 if (Instr.isPHI()) { 1403 unsigned PHIDestReg = getPHIDestReg(Instr); 1404 DEBUG(dbgs() << "Extractking killed phi:\n"); 1405 DEBUG(Instr.dump()); 1406 PHIs.insert(&Instr); 1407 PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); 1408 storePHILinearizationInfoDest(PHIDestReg, Instr); 1409 } 1410 } 1411 1412 for (auto PI : PHIs) { 1413 PI->eraseFromParent(); 1414 } 1415 } 1416 1417 static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices, 1418 unsigned Index) { 1419 for (auto i : PHIRegionIndices) { 1420 if (i == Index) 1421 return true; 1422 } 1423 return false; 1424 } 1425 1426 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, 1427 SmallVector<unsigned, 2> &PHIIndices, 1428 unsigned *ReplaceReg) { 1429 return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg); 1430 } 1431 1432 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, 1433 unsigned CombinedSourceReg, 1434 MachineBasicBlock *SourceMBB, 1435 SmallVector<unsigned, 2> &PHIIndices, 1436 unsigned *ReplaceReg) { 1437 DEBUG(dbgs() << "Shrink PHI: "); 1438 DEBUG(PHI.dump()); 1439 DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI) 1440 << "<def> = PHI("); 1441 1442 bool Replaced = false; 1443 unsigned NumInputs = getPHINumInputs(PHI); 1444 int SingleExternalEntryIndex = -1; 1445 for (unsigned i = 0; i < NumInputs; ++i) { 1446 if (!isPHIRegionIndex(PHIIndices, i)) { 1447 if (SingleExternalEntryIndex == -1) { 1448 // Single entry 1449 SingleExternalEntryIndex = i; 1450 } else { 1451 // Multiple entries 1452 SingleExternalEntryIndex = -2; 1453 } 1454 } 1455 } 1456 1457 if (SingleExternalEntryIndex > -1) { 1458 *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex); 1459 // We should not rewrite the code, we should only pick up the single value 1460 // that represents the shrunk PHI. 1461 Replaced = true; 1462 } else { 1463 MachineBasicBlock *MBB = PHI.getParent(); 1464 MachineInstrBuilder MIB = 1465 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1466 getPHIDestReg(PHI)); 1467 if (SourceMBB) { 1468 MIB.addReg(CombinedSourceReg); 1469 MIB.addMBB(SourceMBB); 1470 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1471 << SourceMBB->getNumber()); 1472 } 1473 1474 for (unsigned i = 0; i < NumInputs; ++i) { 1475 if (isPHIRegionIndex(PHIIndices, i)) { 1476 continue; 1477 } 1478 unsigned SourceReg = getPHISourceReg(PHI, i); 1479 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1480 MIB.addReg(SourceReg); 1481 MIB.addMBB(SourcePred); 1482 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1483 << SourcePred->getNumber()); 1484 } 1485 DEBUG(dbgs() << ")\n"); 1486 } 1487 PHI.eraseFromParent(); 1488 return Replaced; 1489 } 1490 1491 void AMDGPUMachineCFGStructurizer::replacePHI( 1492 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge, 1493 SmallVector<unsigned, 2> &PHIRegionIndices) { 1494 DEBUG(dbgs() << "Replace PHI: "); 1495 DEBUG(PHI.dump()); 1496 DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) 1497 << "<def> = PHI("); 1498 1499 bool HasExternalEdge = false; 1500 unsigned NumInputs = getPHINumInputs(PHI); 1501 for (unsigned i = 0; i < NumInputs; ++i) { 1502 if (!isPHIRegionIndex(PHIRegionIndices, i)) { 1503 HasExternalEdge = true; 1504 } 1505 } 1506 1507 if (HasExternalEdge) { 1508 MachineBasicBlock *MBB = PHI.getParent(); 1509 MachineInstrBuilder MIB = 1510 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1511 getPHIDestReg(PHI)); 1512 MIB.addReg(CombinedSourceReg); 1513 MIB.addMBB(LastMerge); 1514 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1515 << LastMerge->getNumber()); 1516 for (unsigned i = 0; i < NumInputs; ++i) { 1517 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1518 continue; 1519 } 1520 unsigned SourceReg = getPHISourceReg(PHI, i); 1521 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1522 MIB.addReg(SourceReg); 1523 MIB.addMBB(SourcePred); 1524 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1525 << SourcePred->getNumber()); 1526 } 1527 DEBUG(dbgs() << ")\n"); 1528 } else { 1529 replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); 1530 } 1531 PHI.eraseFromParent(); 1532 } 1533 1534 void AMDGPUMachineCFGStructurizer::replaceEntryPHI( 1535 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, 1536 SmallVector<unsigned, 2> &PHIRegionIndices) { 1537 1538 DEBUG(dbgs() << "Replace entry PHI: "); 1539 DEBUG(PHI.dump()); 1540 DEBUG(dbgs() << " with "); 1541 1542 unsigned NumInputs = getPHINumInputs(PHI); 1543 unsigned NumNonRegionInputs = NumInputs; 1544 for (unsigned i = 0; i < NumInputs; ++i) { 1545 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1546 NumNonRegionInputs--; 1547 } 1548 } 1549 1550 if (NumNonRegionInputs == 0) { 1551 auto DestReg = getPHIDestReg(PHI); 1552 replaceRegisterWith(DestReg, CombinedSourceReg); 1553 DEBUG(dbgs() << " register " << PrintReg(CombinedSourceReg, TRI) << "\n"); 1554 PHI.eraseFromParent(); 1555 } else { 1556 DEBUG(dbgs() << PrintReg(getPHIDestReg(PHI), TRI) << "<def> = PHI("); 1557 MachineBasicBlock *MBB = PHI.getParent(); 1558 MachineInstrBuilder MIB = 1559 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1560 getPHIDestReg(PHI)); 1561 MIB.addReg(CombinedSourceReg); 1562 MIB.addMBB(IfMBB); 1563 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1564 << IfMBB->getNumber()); 1565 unsigned NumInputs = getPHINumInputs(PHI); 1566 for (unsigned i = 0; i < NumInputs; ++i) { 1567 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1568 continue; 1569 } 1570 unsigned SourceReg = getPHISourceReg(PHI, i); 1571 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1572 MIB.addReg(SourceReg); 1573 MIB.addMBB(SourcePred); 1574 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1575 << SourcePred->getNumber()); 1576 } 1577 DEBUG(dbgs() << ")\n"); 1578 PHI.eraseFromParent(); 1579 } 1580 } 1581 1582 void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs( 1583 MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices, 1584 unsigned CombinedSourceReg, LinearizedRegion *LRegion) { 1585 bool WasLiveOut = false; 1586 for (auto PII : PHIRegionIndices) { 1587 unsigned Reg = getPHISourceReg(PHI, PII); 1588 if (LRegion->isLiveOut(Reg)) { 1589 bool IsDead = true; 1590 1591 // Check if register is live out of the basic block 1592 MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent(); 1593 for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) { 1594 if ((*UI).getParent()->getParent() != DefMBB) { 1595 IsDead = false; 1596 } 1597 } 1598 1599 DEBUG(dbgs() << "Register " << PrintReg(Reg, TRI) << " is " 1600 << (IsDead ? "dead" : "alive") << " after PHI replace\n"); 1601 if (IsDead) { 1602 LRegion->removeLiveOut(Reg); 1603 } 1604 WasLiveOut = true; 1605 } 1606 } 1607 1608 if (WasLiveOut) 1609 LRegion->addLiveOut(CombinedSourceReg); 1610 } 1611 1612 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region, 1613 MachineBasicBlock *LastMerge, 1614 MachineInstr &PHI, 1615 LinearizedRegion *LRegion) { 1616 SmallVector<unsigned, 2> PHIRegionIndices; 1617 getPHIRegionIndices(Region, PHI, PHIRegionIndices); 1618 unsigned LinearizedSourceReg = 1619 storePHILinearizationInfo(PHI, &PHIRegionIndices); 1620 1621 replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices); 1622 replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion); 1623 } 1624 1625 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region, 1626 MachineBasicBlock *IfMBB, 1627 MachineInstr &PHI) { 1628 SmallVector<unsigned, 2> PHINonRegionIndices; 1629 getPHINonRegionIndices(Region, PHI, PHINonRegionIndices); 1630 unsigned LinearizedSourceReg = 1631 storePHILinearizationInfo(PHI, &PHINonRegionIndices); 1632 replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices); 1633 } 1634 1635 static void collectPHIs(MachineBasicBlock *MBB, 1636 SmallVector<MachineInstr *, 2> &PHIs) { 1637 for (auto &BBI : *MBB) { 1638 if (BBI.isPHI()) { 1639 PHIs.push_back(&BBI); 1640 } 1641 } 1642 } 1643 1644 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region, 1645 MachineBasicBlock *LastMerge, 1646 LinearizedRegion *LRegion) { 1647 SmallVector<MachineInstr *, 2> PHIs; 1648 auto Exit = Region->getSucc(); 1649 if (Exit == nullptr) 1650 return; 1651 1652 collectPHIs(Exit, PHIs); 1653 1654 for (auto PHII : PHIs) { 1655 rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion); 1656 } 1657 } 1658 1659 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region, 1660 MachineBasicBlock *IfMBB) { 1661 SmallVector<MachineInstr *, 2> PHIs; 1662 auto Entry = Region->getEntry(); 1663 1664 collectPHIs(Entry, PHIs); 1665 1666 for (auto PHII : PHIs) { 1667 rewriteRegionEntryPHI(Region, IfMBB, *PHII); 1668 } 1669 } 1670 1671 void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, 1672 MachineBasicBlock *Dest, 1673 const DebugLoc &DL) { 1674 DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber() 1675 << " -> " << Dest->getNumber() << "\n"); 1676 MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator(); 1677 bool HasTerminator = Terminator != MBB->instr_end(); 1678 if (HasTerminator) { 1679 TII->ReplaceTailWithBranchTo(Terminator, Dest); 1680 } 1681 if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) { 1682 TII->insertUnconditionalBranch(*MBB, Dest, DL); 1683 } 1684 } 1685 1686 static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) { 1687 MachineBasicBlock *result = nullptr; 1688 for (auto &MFI : MF) { 1689 if (MFI.succ_size() == 0) { 1690 if (result == nullptr) { 1691 result = &MFI; 1692 } else { 1693 return nullptr; 1694 } 1695 } 1696 } 1697 1698 return result; 1699 } 1700 1701 static bool hasOneExitNode(MachineFunction &MF) { 1702 return getSingleExitNode(MF) != nullptr; 1703 } 1704 1705 MachineBasicBlock * 1706 AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) { 1707 auto Exit = Region->getSucc(); 1708 1709 // If the exit is the end of the function, we just use the existing 1710 MachineFunction *MF = Region->getEntry()->getParent(); 1711 if (Exit == nullptr && hasOneExitNode(*MF)) { 1712 return &(*(--(Region->getEntry()->getParent()->end()))); 1713 } 1714 1715 MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock(); 1716 if (Exit == nullptr) { 1717 MachineFunction::iterator ExitIter = MF->end(); 1718 MF->insert(ExitIter, LastMerge); 1719 } else { 1720 MachineFunction::iterator ExitIter = Exit->getIterator(); 1721 MF->insert(ExitIter, LastMerge); 1722 LastMerge->addSuccessor(Exit); 1723 insertUnconditionalBranch(LastMerge, Exit); 1724 DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n"); 1725 } 1726 return LastMerge; 1727 } 1728 1729 void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, 1730 MachineBasicBlock *CodeBB, 1731 MachineBasicBlock *MergeBB, 1732 unsigned DestRegister, 1733 unsigned IfSourceRegister, 1734 unsigned CodeSourceRegister, 1735 bool IsUndefIfSource) { 1736 // If this is the function exit block, we don't need a phi. 1737 if (MergeBB->succ_begin() == MergeBB->succ_end()) { 1738 return; 1739 } 1740 DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber() 1741 << "): " << PrintReg(DestRegister, TRI) << "<def> = PHI(" 1742 << PrintReg(IfSourceRegister, TRI) << ", BB#" 1743 << IfBB->getNumber() << PrintReg(CodeSourceRegister, TRI) 1744 << ", BB#" << CodeBB->getNumber() << ")\n"); 1745 const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin()); 1746 MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, 1747 TII->get(TargetOpcode::PHI), DestRegister); 1748 if (IsUndefIfSource && false) { 1749 MIB.addReg(IfSourceRegister, RegState::Undef); 1750 } else { 1751 MIB.addReg(IfSourceRegister); 1752 } 1753 MIB.addMBB(IfBB); 1754 MIB.addReg(CodeSourceRegister); 1755 MIB.addMBB(CodeBB); 1756 } 1757 1758 static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) { 1759 for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), 1760 E = MBB->succ_end(); 1761 PI != E; ++PI) { 1762 if ((*PI) != MBB) { 1763 (MBB)->removeSuccessor(*PI); 1764 } 1765 } 1766 } 1767 1768 static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, 1769 MachineBasicBlock *EndMBB) { 1770 1771 // We have to check against the StartMBB successor becasuse a 1772 // structurized region with a loop will have the entry block split, 1773 // and the backedge will go to the entry successor. 1774 DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs; 1775 unsigned SuccSize = StartMBB->succ_size(); 1776 if (SuccSize > 0) { 1777 MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); 1778 for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), 1779 E = EndMBB->succ_end(); 1780 PI != E; ++PI) { 1781 // Either we have a back-edge to the entry block, or a back-edge to the 1782 // succesor of the entry block since the block may be split. 1783 if ((*PI) != StartMBB && 1784 !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { 1785 Succs.insert( 1786 std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI)); 1787 } 1788 } 1789 } 1790 1791 for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), 1792 E = StartMBB->pred_end(); 1793 PI != E; ++PI) { 1794 if ((*PI) != EndMBB) { 1795 Succs.insert( 1796 std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB)); 1797 } 1798 } 1799 1800 for (auto SI : Succs) { 1801 std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI; 1802 DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#" 1803 << Edge.second->getNumber() << "\n"); 1804 Edge.first->removeSuccessor(Edge.second); 1805 } 1806 } 1807 1808 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( 1809 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart, 1810 MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg, 1811 bool InheritPreds) { 1812 MachineFunction *MF = MergeBB->getParent(); 1813 MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); 1814 1815 if (InheritPreds) { 1816 for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), 1817 E = CodeBBStart->pred_end(); 1818 PI != E; ++PI) { 1819 if ((*PI) != CodeBBEnd) { 1820 MachineBasicBlock *Pred = (*PI); 1821 Pred->addSuccessor(IfBB); 1822 } 1823 } 1824 } 1825 1826 removeExternalCFGEdges(CodeBBStart, CodeBBEnd); 1827 1828 auto CodeBBStartI = CodeBBStart->getIterator(); 1829 auto CodeBBEndI = CodeBBEnd->getIterator(); 1830 auto MergeIter = MergeBB->getIterator(); 1831 MF->insert(MergeIter, IfBB); 1832 MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI); 1833 IfBB->addSuccessor(MergeBB); 1834 IfBB->addSuccessor(CodeBBStart); 1835 1836 DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); 1837 // Ensure that the MergeBB is a succesor of the CodeEndBB. 1838 if (!CodeBBEnd->isSuccessor(MergeBB)) 1839 CodeBBEnd->addSuccessor(MergeBB); 1840 1841 DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#" 1842 << CodeBBEnd->getNumber() << "\n"); 1843 1844 // If we have a single predecessor we can find a reasonable debug location 1845 MachineBasicBlock *SinglePred = 1846 CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr; 1847 const DebugLoc &DL = SinglePred 1848 ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator()) 1849 : DebugLoc(); 1850 1851 unsigned Reg = 1852 TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg, 1853 SelectBB->getNumber() /* CodeBBStart->getNumber() */); 1854 if (&(*(IfBB->getParent()->begin())) == IfBB) { 1855 TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg, 1856 CodeBBStart->getNumber()); 1857 } 1858 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); 1859 ArrayRef<MachineOperand> Cond(RegOp); 1860 TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL); 1861 1862 return IfBB; 1863 } 1864 1865 void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled( 1866 SmallVector<MachineOperand, 1> Cond) { 1867 if (Cond.size() != 1) 1868 return; 1869 if (!Cond[0].isReg()) 1870 return; 1871 1872 unsigned CondReg = Cond[0].getReg(); 1873 for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { 1874 (*UI).setIsKill(false); 1875 } 1876 } 1877 1878 void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, 1879 MachineBasicBlock *MergeBB, 1880 unsigned BBSelectReg) { 1881 MachineBasicBlock *TrueBB = nullptr; 1882 MachineBasicBlock *FalseBB = nullptr; 1883 SmallVector<MachineOperand, 1> Cond; 1884 MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB]; 1885 TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond); 1886 1887 const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator()); 1888 1889 if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) { 1890 // This is an exit block, hence no successors. We will assign the 1891 // bb select register to the entry block. 1892 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1893 BBSelectReg, 1894 CodeBB->getParent()->begin()->getNumber()); 1895 insertUnconditionalBranch(CodeBB, MergeBB, DL); 1896 return; 1897 } 1898 1899 if (FalseBB == nullptr && TrueBB == nullptr) { 1900 TrueBB = FallthroughBB; 1901 } else if (TrueBB != nullptr) { 1902 FalseBB = 1903 (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB; 1904 } 1905 1906 if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) { 1907 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1908 BBSelectReg, TrueBB->getNumber()); 1909 } else { 1910 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); 1911 unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); 1912 unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); 1913 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1914 TrueBBReg, TrueBB->getNumber()); 1915 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1916 FalseBBReg, FalseBB->getNumber()); 1917 ensureCondIsNotKilled(Cond); 1918 TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL, 1919 BBSelectReg, Cond, TrueBBReg, FalseBBReg); 1920 } 1921 1922 insertUnconditionalBranch(CodeBB, MergeBB, DL); 1923 } 1924 1925 MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) { 1926 if (MRI->def_begin(Reg) == MRI->def_end()) { 1927 DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1928 << " has NO defs\n"); 1929 } else if (!MRI->hasOneDef(Reg)) { 1930 DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1931 << " has multiple defs\n"); 1932 DEBUG(dbgs() << "DEFS BEGIN:\n"); 1933 for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { 1934 DEBUG(DI->getParent()->dump()); 1935 } 1936 DEBUG(dbgs() << "DEFS END\n"); 1937 } 1938 1939 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 1940 return (*(MRI->def_begin(Reg))).getParent(); 1941 } 1942 1943 void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, 1944 MachineBasicBlock *CodeBB, 1945 MachineBasicBlock *MergeBB, 1946 LinearizedRegion *InnerRegion, 1947 unsigned DestReg, 1948 unsigned SourceReg) { 1949 // In this function we know we are part of a chain already, so we need 1950 // to add the registers to the existing chain, and rename the register 1951 // inside the region. 1952 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); 1953 MachineInstr *DefInstr = getDefInstr(SourceReg); 1954 if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) { 1955 // Handle the case where the def is a PHI-def inside a basic 1956 // block, then we only need to do renaming. Special care needs to 1957 // be taken if the PHI-def is part of an existing chain, or if a 1958 // new one needs to be created. 1959 InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI); 1960 1961 // We collect all PHI Information, and if we are at the region entry, 1962 // all PHIs will be removed, and then re-introduced if needed. 1963 storePHILinearizationInfoDest(DestReg, *DefInstr); 1964 // We have picked up all the information we need now and can remove 1965 // the PHI 1966 PHIInfo.removeSource(DestReg, SourceReg, CodeBB); 1967 DefInstr->eraseFromParent(); 1968 } else { 1969 // If this is not a phi-def, or it is a phi-def but from a linearized region 1970 if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) { 1971 // If this is a single BB and the definition is in this block we 1972 // need to replace any uses outside the region. 1973 InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); 1974 } 1975 const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); 1976 unsigned NextDestReg = MRI->createVirtualRegister(RegClass); 1977 bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; 1978 DEBUG(dbgs() << "Insert Chained PHI\n"); 1979 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, 1980 SourceReg, IsLastDef); 1981 1982 PHIInfo.removeSource(DestReg, SourceReg, CodeBB); 1983 if (IsLastDef) { 1984 const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator()); 1985 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL, 1986 NextDestReg, 0); 1987 PHIInfo.deleteDef(DestReg); 1988 } else { 1989 PHIInfo.replaceDef(DestReg, NextDestReg); 1990 } 1991 } 1992 } 1993 1994 bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB, 1995 LinearizedRegion *InnerRegion, 1996 unsigned Register) { 1997 return getDefInstr(Register)->getParent() == MBB || 1998 InnerRegion->contains(getDefInstr(Register)->getParent()); 1999 } 2000 2001 void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, 2002 MachineBasicBlock *CodeBB, 2003 MachineBasicBlock *MergeBB, 2004 LinearizedRegion *InnerRegion, 2005 LinearizedRegion *LRegion) { 2006 DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts(); 2007 SmallVector<unsigned, 4> OldLiveOuts; 2008 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); 2009 for (auto OLI : *LiveOuts) { 2010 OldLiveOuts.push_back(OLI); 2011 } 2012 2013 for (auto LI : OldLiveOuts) { 2014 DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI)); 2015 if (!containsDef(CodeBB, InnerRegion, LI) || 2016 (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) { 2017 // If the register simly lives through the CodeBB, we don't have 2018 // to rewrite anything since the register is not defined in this 2019 // part of the code. 2020 DEBUG(dbgs() << "- through"); 2021 continue; 2022 } 2023 DEBUG(dbgs() << "\n"); 2024 unsigned Reg = LI; 2025 if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) { 2026 // If the register is live out, we do want to create a phi, 2027 // unless it is from the Exit block, becasuse in that case there 2028 // is already a PHI, and no need to create a new one. 2029 2030 // If the register is just a live out def and not part of a phi 2031 // chain, we need to create a PHI node to handle the if region, 2032 // and replace all uses outside of the region with the new dest 2033 // register, unless it is the outgoing BB select register. We have 2034 // already creaed phi nodes for these. 2035 const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); 2036 unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); 2037 unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); 2038 // Create initializer, this value is never used, but is needed 2039 // to satisfy SSA. 2040 DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n"); 2041 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), 2042 IfSourceReg, 0); 2043 2044 InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); 2045 DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); 2046 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, 2047 IfSourceReg, Reg, true); 2048 } 2049 } 2050 2051 // Handle the chained definitions in PHIInfo, checking if this basic block 2052 // is a source block for a definition. 2053 SmallVector<unsigned, 4> Sources; 2054 if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) { 2055 DEBUG(dbgs() << "Inserting PHI Live Out from BB#" << CodeBB->getNumber() 2056 << "\n"); 2057 for (auto SI : Sources) { 2058 unsigned DestReg; 2059 PHIInfo.findDest(SI, CodeBB, DestReg); 2060 insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI); 2061 } 2062 DEBUG(dbgs() << "Insertion done.\n"); 2063 } 2064 2065 DEBUG(PHIInfo.dump(MRI)); 2066 } 2067 2068 void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) { 2069 DEBUG(dbgs() << "Before PHI Prune\n"); 2070 DEBUG(PHIInfo.dump(MRI)); 2071 SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4> 2072 ElimiatedSources; 2073 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2074 ++DRI) { 2075 2076 unsigned DestReg = *DRI; 2077 auto SE = PHIInfo.sources_end(DestReg); 2078 2079 bool MBBContainsPHISource = false; 2080 // Check if there is a PHI source in this MBB 2081 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2082 unsigned SourceReg = (*SRI).first; 2083 MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); 2084 if (Def->getParent()->getParent() == MBB) { 2085 MBBContainsPHISource = true; 2086 } 2087 } 2088 2089 // If so, all other sources are useless since we know this block 2090 // is always executed when the region is executed. 2091 if (MBBContainsPHISource) { 2092 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2093 PHILinearize::PHISourceT Source = *SRI; 2094 unsigned SourceReg = Source.first; 2095 MachineBasicBlock *SourceMBB = Source.second; 2096 MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); 2097 if (Def->getParent()->getParent() != MBB) { 2098 ElimiatedSources.push_back( 2099 std::make_tuple(DestReg, SourceReg, SourceMBB)); 2100 } 2101 } 2102 } 2103 } 2104 2105 // Remove the PHI sources that are in the given MBB 2106 for (auto &SourceInfo : ElimiatedSources) { 2107 PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo), 2108 std::get<2>(SourceInfo)); 2109 } 2110 DEBUG(dbgs() << "After PHI Prune\n"); 2111 DEBUG(PHIInfo.dump(MRI)); 2112 } 2113 2114 void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion, 2115 unsigned DestReg) { 2116 MachineBasicBlock *Entry = CurrentRegion->getEntry(); 2117 MachineBasicBlock *Exit = CurrentRegion->getExit(); 2118 2119 DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() 2120 << " Pred: " << (*(Entry->pred_begin()))->getNumber() << "\n"); 2121 2122 int NumSources = 0; 2123 auto SE = PHIInfo.sources_end(DestReg); 2124 2125 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2126 NumSources++; 2127 } 2128 2129 if (NumSources == 1) { 2130 auto SRI = PHIInfo.sources_begin(DestReg); 2131 unsigned SourceReg = (*SRI).first; 2132 replaceRegisterWith(DestReg, SourceReg); 2133 } else { 2134 const DebugLoc &DL = Entry->findDebugLoc(Entry->begin()); 2135 MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, 2136 TII->get(TargetOpcode::PHI), DestReg); 2137 DEBUG(dbgs() << "Entry PHI " << PrintReg(DestReg, TRI) << "<def> = PHI("); 2138 2139 unsigned CurrentBackedgeReg = 0; 2140 2141 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2142 unsigned SourceReg = (*SRI).first; 2143 2144 if (CurrentRegion->contains((*SRI).second)) { 2145 if (CurrentBackedgeReg == 0) { 2146 CurrentBackedgeReg = SourceReg; 2147 } else { 2148 MachineInstr *PHIDefInstr = getDefInstr(SourceReg); 2149 MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent(); 2150 const TargetRegisterClass *RegClass = 2151 MRI->getRegClass(CurrentBackedgeReg); 2152 unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass); 2153 MachineInstrBuilder BackedgePHI = 2154 BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL, 2155 TII->get(TargetOpcode::PHI), NewBackedgeReg); 2156 BackedgePHI.addReg(CurrentBackedgeReg); 2157 BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0)); 2158 BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1)); 2159 BackedgePHI.addMBB((*SRI).second); 2160 CurrentBackedgeReg = NewBackedgeReg; 2161 DEBUG(dbgs() << "Inserting backedge PHI: " 2162 << PrintReg(NewBackedgeReg, TRI) << "<def> = PHI(" 2163 << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" 2164 << getPHIPred(*PHIDefInstr, 0)->getNumber() << ", " 2165 << PrintReg(getPHISourceReg(*PHIDefInstr, 1), TRI) 2166 << ", BB#" << (*SRI).second->getNumber()); 2167 } 2168 } else { 2169 MIB.addReg(SourceReg); 2170 MIB.addMBB((*SRI).second); 2171 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 2172 << (*SRI).second->getNumber() << ", "); 2173 } 2174 } 2175 2176 // Add the final backedge register source to the entry phi 2177 if (CurrentBackedgeReg != 0) { 2178 MIB.addReg(CurrentBackedgeReg); 2179 MIB.addMBB(Exit); 2180 DEBUG(dbgs() << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" 2181 << Exit->getNumber() << ")\n"); 2182 } else { 2183 DEBUG(dbgs() << ")\n"); 2184 } 2185 } 2186 } 2187 2188 void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { 2189 DEBUG(PHIInfo.dump(MRI)); 2190 2191 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2192 ++DRI) { 2193 2194 unsigned DestReg = *DRI; 2195 createEntryPHI(CurrentRegion, DestReg); 2196 } 2197 PHIInfo.clear(); 2198 } 2199 2200 void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, 2201 unsigned NewRegister) { 2202 assert(Register != NewRegister && "Cannot replace a reg with itself"); 2203 2204 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 2205 E = MRI->reg_end(); 2206 I != E;) { 2207 MachineOperand &O = *I; 2208 ++I; 2209 if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { 2210 DEBUG(dbgs() << "Trying to substitute physical register: " 2211 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 2212 << "\n"); 2213 llvm_unreachable("Cannot substitute physical registers"); 2214 // We don't handle physical registers, but if we need to 2215 // in the future This is how we do it: 2216 // O.substPhysReg(NewRegister, *TRI); 2217 } else { 2218 DEBUG(dbgs() << "Replacing register: " 2219 << PrintReg(Register, MRI->getTargetRegisterInfo()) 2220 << " with " 2221 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 2222 << "\n"); 2223 O.setReg(NewRegister); 2224 } 2225 } 2226 PHIInfo.deleteDef(Register); 2227 2228 getRegionMRT()->replaceLiveOutReg(Register, NewRegister); 2229 2230 DEBUG(PHIInfo.dump(MRI)); 2231 } 2232 2233 void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) { 2234 DEBUG(dbgs() << "Resolve PHI Infos\n"); 2235 DEBUG(PHIInfo.dump(MRI)); 2236 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2237 ++DRI) { 2238 unsigned DestReg = *DRI; 2239 DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n"); 2240 auto SRI = PHIInfo.sources_begin(DestReg); 2241 unsigned SourceReg = (*SRI).first; 2242 DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) 2243 << " SourceReg: " << PrintReg(SourceReg, TRI) << "\n"); 2244 2245 assert(PHIInfo.sources_end(DestReg) == ++SRI && 2246 "More than one phi source in entry node"); 2247 replaceRegisterWith(DestReg, SourceReg); 2248 } 2249 } 2250 2251 static bool isFunctionEntryBlock(MachineBasicBlock *MBB) { 2252 return ((&(*(MBB->getParent()->begin()))) == MBB); 2253 } 2254 2255 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( 2256 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB, 2257 LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn, 2258 unsigned BBSelectRegOut) { 2259 if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) { 2260 // Handle non-loop function entry block. 2261 // We need to allow loops to the entry block and then 2262 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); 2263 resolvePHIInfos(CodeBB); 2264 removeExternalCFGSuccessors(CodeBB); 2265 CodeBB->addSuccessor(MergeBB); 2266 CurrentRegion->addMBB(CodeBB); 2267 return nullptr; 2268 } 2269 if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) { 2270 // Handle non-loop region entry block. 2271 MachineFunction *MF = MergeBB->getParent(); 2272 auto MergeIter = MergeBB->getIterator(); 2273 auto CodeBBStartIter = CodeBB->getIterator(); 2274 auto CodeBBEndIter = ++(CodeBB->getIterator()); 2275 if (CodeBBEndIter != MergeIter) { 2276 MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter); 2277 } 2278 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); 2279 prunePHIInfo(CodeBB); 2280 createEntryPHIs(CurrentRegion); 2281 removeExternalCFGSuccessors(CodeBB); 2282 CodeBB->addSuccessor(MergeBB); 2283 CurrentRegion->addMBB(CodeBB); 2284 return nullptr; 2285 } else { 2286 // Handle internal block. 2287 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); 2288 unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); 2289 rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); 2290 bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; 2291 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, 2292 BBSelectRegIn, IsRegionEntryBB); 2293 CurrentRegion->addMBB(IfBB); 2294 // If this is the entry block we need to make the If block the new 2295 // linearized region entry. 2296 if (IsRegionEntryBB) { 2297 CurrentRegion->setEntry(IfBB); 2298 2299 if (CurrentRegion->getHasLoop()) { 2300 MachineBasicBlock *RegionExit = CurrentRegion->getExit(); 2301 MachineBasicBlock *ETrueBB = nullptr; 2302 MachineBasicBlock *EFalseBB = nullptr; 2303 SmallVector<MachineOperand, 1> ECond; 2304 2305 const DebugLoc &DL = DebugLoc(); 2306 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); 2307 TII->removeBranch(*RegionExit); 2308 2309 // We need to create a backedge if there is a loop 2310 unsigned Reg = TII->insertNE( 2311 RegionExit, RegionExit->instr_end(), DL, 2312 CurrentRegion->getRegionMRT()->getInnerOutputRegister(), 2313 CurrentRegion->getRegionMRT()->getEntry()->getNumber()); 2314 MachineOperand RegOp = 2315 MachineOperand::CreateReg(Reg, false, false, true); 2316 ArrayRef<MachineOperand> Cond(RegOp); 2317 DEBUG(dbgs() << "RegionExitReg: "); 2318 DEBUG(Cond[0].print(dbgs(), TRI)); 2319 DEBUG(dbgs() << "\n"); 2320 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, 2321 Cond, DebugLoc()); 2322 RegionExit->addSuccessor(CurrentRegion->getEntry()); 2323 } 2324 } 2325 CurrentRegion->addMBB(CodeBB); 2326 LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); 2327 2328 InnerRegion.setParent(CurrentRegion); 2329 DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); 2330 insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, 2331 CodeBBSelectReg); 2332 InnerRegion.addMBB(MergeBB); 2333 2334 DEBUG(InnerRegion.print(dbgs(), TRI)); 2335 rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); 2336 extractKilledPHIs(CodeBB); 2337 if (IsRegionEntryBB) { 2338 createEntryPHIs(CurrentRegion); 2339 } 2340 return IfBB; 2341 } 2342 } 2343 2344 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( 2345 MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion, 2346 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, 2347 unsigned BBSelectRegIn, unsigned BBSelectRegOut) { 2348 unsigned CodeBBSelectReg = 2349 InnerRegion->getRegionMRT()->getInnerOutputRegister(); 2350 MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry(); 2351 MachineBasicBlock *CodeExitBB = InnerRegion->getExit(); 2352 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB, 2353 SelectBB, BBSelectRegIn, true); 2354 CurrentRegion->addMBB(IfBB); 2355 bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry(); 2356 if (isEntry) { 2357 2358 if (CurrentRegion->getHasLoop()) { 2359 MachineBasicBlock *RegionExit = CurrentRegion->getExit(); 2360 MachineBasicBlock *ETrueBB = nullptr; 2361 MachineBasicBlock *EFalseBB = nullptr; 2362 SmallVector<MachineOperand, 1> ECond; 2363 2364 const DebugLoc &DL = DebugLoc(); 2365 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); 2366 TII->removeBranch(*RegionExit); 2367 2368 // We need to create a backedge if there is a loop 2369 unsigned Reg = 2370 TII->insertNE(RegionExit, RegionExit->instr_end(), DL, 2371 CurrentRegion->getRegionMRT()->getInnerOutputRegister(), 2372 CurrentRegion->getRegionMRT()->getEntry()->getNumber()); 2373 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); 2374 ArrayRef<MachineOperand> Cond(RegOp); 2375 DEBUG(dbgs() << "RegionExitReg: "); 2376 DEBUG(Cond[0].print(dbgs(), TRI)); 2377 DEBUG(dbgs() << "\n"); 2378 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, 2379 Cond, DebugLoc()); 2380 RegionExit->addSuccessor(IfBB); 2381 } 2382 } 2383 CurrentRegion->addMBBs(InnerRegion); 2384 DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); 2385 insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, 2386 CodeBBSelectReg); 2387 2388 rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion, 2389 CurrentRegion); 2390 2391 rewriteRegionEntryPHIs(InnerRegion, IfBB); 2392 2393 if (isEntry) { 2394 CurrentRegion->setEntry(IfBB); 2395 } 2396 2397 if (isEntry) { 2398 createEntryPHIs(CurrentRegion); 2399 } 2400 2401 return IfBB; 2402 } 2403 2404 void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, 2405 MachineBasicBlock *Entry, 2406 MachineBasicBlock *EntrySucc, 2407 LinearizedRegion *LRegion) { 2408 SmallVector<unsigned, 2> PHIRegionIndices; 2409 getPHIRegionIndices(LRegion, PHI, PHIRegionIndices); 2410 2411 assert(PHIRegionIndices.size() == 1); 2412 2413 unsigned RegionIndex = PHIRegionIndices[0]; 2414 unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex); 2415 MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex); 2416 unsigned PHIDest = getPHIDestReg(PHI); 2417 unsigned PHISource = PHIDest; 2418 unsigned ReplaceReg; 2419 2420 if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) { 2421 PHISource = ReplaceReg; 2422 } 2423 2424 const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); 2425 unsigned NewDestReg = MRI->createVirtualRegister(RegClass); 2426 LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); 2427 MachineInstrBuilder MIB = 2428 BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), 2429 TII->get(TargetOpcode::PHI), NewDestReg); 2430 DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI) 2431 << "<def> = PHI("); 2432 MIB.addReg(PHISource); 2433 MIB.addMBB(Entry); 2434 DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber()); 2435 MIB.addReg(RegionSourceReg); 2436 MIB.addMBB(RegionSourceMBB); 2437 DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << ", BB#" 2438 << RegionSourceMBB->getNumber() << ")\n"); 2439 } 2440 2441 void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, 2442 MachineBasicBlock *EntrySucc, 2443 LinearizedRegion *LRegion) { 2444 SmallVector<MachineInstr *, 2> PHIs; 2445 collectPHIs(Entry, PHIs); 2446 2447 for (auto PHII : PHIs) { 2448 splitLoopPHI(*PHII, Entry, EntrySucc, LRegion); 2449 } 2450 } 2451 2452 // Split the exit block so that we can insert a end control flow 2453 MachineBasicBlock * 2454 AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) { 2455 auto MRTRegion = LRegion->getRegionMRT(); 2456 auto Exit = LRegion->getExit(); 2457 auto MF = Exit->getParent(); 2458 auto Succ = MRTRegion->getSucc(); 2459 2460 auto NewExit = MF->CreateMachineBasicBlock(); 2461 auto AfterExitIter = Exit->getIterator(); 2462 AfterExitIter++; 2463 MF->insert(AfterExitIter, NewExit); 2464 Exit->removeSuccessor(Succ); 2465 Exit->addSuccessor(NewExit); 2466 NewExit->addSuccessor(Succ); 2467 insertUnconditionalBranch(NewExit, Succ); 2468 LRegion->addMBB(NewExit); 2469 LRegion->setExit(NewExit); 2470 2471 DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n"); 2472 2473 // Replace any PHI Predecessors in the successor with NewExit 2474 for (auto &II : *Succ) { 2475 MachineInstr &Instr = II; 2476 2477 // If we are past the PHI instructions we are done 2478 if (!Instr.isPHI()) 2479 break; 2480 2481 int numPreds = getPHINumInputs(Instr); 2482 for (int i = 0; i < numPreds; ++i) { 2483 auto Pred = getPHIPred(Instr, i); 2484 if (Pred == Exit) { 2485 setPhiPred(Instr, i, NewExit); 2486 } 2487 } 2488 } 2489 2490 return NewExit; 2491 } 2492 2493 2494 static MachineBasicBlock *split(MachineBasicBlock::iterator I) { 2495 // Create the fall-through block. 2496 MachineBasicBlock *MBB = (*I).getParent(); 2497 MachineFunction *MF = MBB->getParent(); 2498 MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock(); 2499 auto MBBIter = ++(MBB->getIterator()); 2500 MF->insert(MBBIter, SuccMBB); 2501 SuccMBB->transferSuccessorsAndUpdatePHIs(MBB); 2502 MBB->addSuccessor(SuccMBB); 2503 2504 // Splice the code over. 2505 SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end()); 2506 2507 return SuccMBB; 2508 } 2509 2510 // Split the entry block separating PHI-nodes and the rest of the code 2511 // This is needed to insert an initializer for the bb select register 2512 // inloop regions. 2513 2514 MachineBasicBlock * 2515 AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { 2516 MachineBasicBlock *Entry = LRegion->getEntry(); 2517 MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI()); 2518 MachineBasicBlock *Exit = LRegion->getExit(); 2519 2520 DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#" 2521 << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber() 2522 << "\n"); 2523 LRegion->addMBB(EntrySucc); 2524 2525 // Make the backedge go to Entry Succ 2526 if (Exit->isSuccessor(Entry)) { 2527 Exit->removeSuccessor(Entry); 2528 } 2529 Exit->addSuccessor(EntrySucc); 2530 MachineInstr &Branch = *(Exit->instr_rbegin()); 2531 for (auto &UI : Branch.uses()) { 2532 if (UI.isMBB() && UI.getMBB() == Entry) { 2533 UI.setMBB(EntrySucc); 2534 } 2535 } 2536 2537 splitLoopPHIs(Entry, EntrySucc, LRegion); 2538 2539 return EntrySucc; 2540 } 2541 2542 LinearizedRegion * 2543 AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) { 2544 LinearizedRegion *LRegion = Region->getLinearizedRegion(); 2545 LRegion->initLiveOut(Region, MRI, TRI, PHIInfo); 2546 LRegion->setEntry(Region->getEntry()); 2547 return LRegion; 2548 } 2549 2550 static void removeOldExitPreds(RegionMRT *Region) { 2551 MachineBasicBlock *Exit = Region->getSucc(); 2552 if (Exit == nullptr) { 2553 return; 2554 } 2555 for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), 2556 E = Exit->pred_end(); 2557 PI != E; ++PI) { 2558 if (Region->contains(*PI)) { 2559 (*PI)->removeSuccessor(Exit); 2560 } 2561 } 2562 } 2563 2564 static bool mbbHasBackEdge(MachineBasicBlock *MBB, 2565 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { 2566 for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { 2567 if (MBBs.count(*SI) != 0) { 2568 return true; 2569 } 2570 } 2571 return false; 2572 } 2573 2574 static bool containsNewBackedge(MRT *Tree, 2575 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { 2576 // Need to traverse this in reverse since it is in post order. 2577 if (Tree == nullptr) 2578 return false; 2579 2580 if (Tree->isMBB()) { 2581 MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB(); 2582 MBBs.insert(MBB); 2583 if (mbbHasBackEdge(MBB, MBBs)) { 2584 return true; 2585 } 2586 } else { 2587 RegionMRT *Region = Tree->getRegionMRT(); 2588 SetVector<MRT *> *Children = Region->getChildren(); 2589 for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) { 2590 if (containsNewBackedge(*CI, MBBs)) 2591 return true; 2592 } 2593 } 2594 return false; 2595 } 2596 2597 static bool containsNewBackedge(RegionMRT *Region) { 2598 SmallPtrSet<MachineBasicBlock *, 8> MBBs; 2599 return containsNewBackedge(Region, MBBs); 2600 } 2601 2602 bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { 2603 auto *LRegion = initLinearizedRegion(Region); 2604 LRegion->setHasLoop(containsNewBackedge(Region)); 2605 MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region); 2606 MachineBasicBlock *CurrentMerge = LastMerge; 2607 LRegion->addMBB(LastMerge); 2608 LRegion->setExit(LastMerge); 2609 2610 rewriteRegionExitPHIs(Region, LastMerge, LRegion); 2611 removeOldExitPreds(Region); 2612 2613 DEBUG(PHIInfo.dump(MRI)); 2614 2615 SetVector<MRT *> *Children = Region->getChildren(); 2616 DEBUG(dbgs() << "===========If Region Start===============\n"); 2617 if (LRegion->getHasLoop()) { 2618 DEBUG(dbgs() << "Has Backedge: Yes\n"); 2619 } else { 2620 DEBUG(dbgs() << "Has Backedge: No\n"); 2621 } 2622 2623 unsigned BBSelectRegIn; 2624 unsigned BBSelectRegOut; 2625 for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { 2626 DEBUG(dbgs() << "CurrentRegion: \n"); 2627 DEBUG(LRegion->print(dbgs(), TRI)); 2628 2629 auto CNI = CI; 2630 ++CNI; 2631 2632 MRT *Child = (*CI); 2633 2634 if (Child->isRegion()) { 2635 2636 LinearizedRegion *InnerLRegion = 2637 Child->getRegionMRT()->getLinearizedRegion(); 2638 // We found the block is the exit of an inner region, we need 2639 // to put it in the current linearized region. 2640 2641 DEBUG(dbgs() << "Linearizing region: "); 2642 DEBUG(InnerLRegion->print(dbgs(), TRI)); 2643 DEBUG(dbgs() << "\n"); 2644 2645 MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); 2646 if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) { 2647 // Entry has already been linearized, no need to do this region. 2648 unsigned OuterSelect = InnerLRegion->getBBSelectRegOut(); 2649 unsigned InnerSelectReg = 2650 InnerLRegion->getRegionMRT()->getInnerOutputRegister(); 2651 replaceRegisterWith(InnerSelectReg, OuterSelect), 2652 resolvePHIInfos(InnerEntry); 2653 if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge)) 2654 InnerLRegion->getExit()->addSuccessor(CurrentMerge); 2655 continue; 2656 } 2657 2658 BBSelectRegOut = Child->getBBSelectRegOut(); 2659 BBSelectRegIn = Child->getBBSelectRegIn(); 2660 2661 DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) 2662 << "\n"); 2663 DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) 2664 << "\n"); 2665 2666 MachineBasicBlock *IfEnd = CurrentMerge; 2667 CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, 2668 Child->getRegionMRT()->getEntry(), 2669 BBSelectRegIn, BBSelectRegOut); 2670 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); 2671 } else { 2672 MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); 2673 DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); 2674 2675 if (MBB == getSingleExitNode(*(MBB->getParent()))) { 2676 // If this is the exit block then we need to skip to the next. 2677 // The "in" register will be transferred to "out" in the next 2678 // iteration. 2679 continue; 2680 } 2681 2682 BBSelectRegOut = Child->getBBSelectRegOut(); 2683 BBSelectRegIn = Child->getBBSelectRegIn(); 2684 2685 DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) 2686 << "\n"); 2687 DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) 2688 << "\n"); 2689 2690 MachineBasicBlock *IfEnd = CurrentMerge; 2691 // This is a basic block that is not part of an inner region, we 2692 // need to put it in the current linearized region. 2693 CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn, 2694 BBSelectRegOut); 2695 if (CurrentMerge) { 2696 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); 2697 } 2698 2699 DEBUG(PHIInfo.dump(MRI)); 2700 } 2701 } 2702 2703 LRegion->removeFalseRegisterKills(MRI); 2704 2705 if (LRegion->getHasLoop()) { 2706 MachineBasicBlock *NewSucc = splitEntry(LRegion); 2707 if (isFunctionEntryBlock(LRegion->getEntry())) { 2708 resolvePHIInfos(LRegion->getEntry()); 2709 } 2710 const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); 2711 unsigned InReg = LRegion->getBBSelectRegIn(); 2712 unsigned InnerSelectReg = 2713 MRI->createVirtualRegister(MRI->getRegClass(InReg)); 2714 unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); 2715 TII->materializeImmediate(*(LRegion->getEntry()), 2716 LRegion->getEntry()->getFirstTerminator(), DL, 2717 NewInReg, Region->getEntry()->getNumber()); 2718 // Need to be careful about updating the registers inside the region. 2719 LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI); 2720 DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n"); 2721 insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc, 2722 InnerSelectReg, NewInReg, 2723 LRegion->getRegionMRT()->getInnerOutputRegister()); 2724 splitExit(LRegion); 2725 TII->convertNonUniformLoopRegion(NewSucc, LastMerge); 2726 } 2727 2728 if (Region->isRoot()) { 2729 TII->insertReturn(*LastMerge); 2730 } 2731 2732 DEBUG(Region->getEntry()->getParent()->dump()); 2733 DEBUG(LRegion->print(dbgs(), TRI)); 2734 DEBUG(PHIInfo.dump(MRI)); 2735 2736 DEBUG(dbgs() << "===========If Region End===============\n"); 2737 2738 Region->setLinearizedRegion(LRegion); 2739 return true; 2740 } 2741 2742 bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) { 2743 if (false && regionIsSimpleIf(Region)) { 2744 transformSimpleIfRegion(Region); 2745 return true; 2746 } else if (regionIsSequence(Region)) { 2747 fixupRegionExits(Region); 2748 return false; 2749 } else { 2750 structurizeComplexRegion(Region); 2751 } 2752 return false; 2753 } 2754 2755 static int structurize_once = 0; 2756 2757 bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region, 2758 bool isTopRegion) { 2759 bool Changed = false; 2760 2761 auto Children = Region->getChildren(); 2762 for (auto CI : *Children) { 2763 if (CI->isRegion()) { 2764 Changed |= structurizeRegions(CI->getRegionMRT(), false); 2765 } 2766 } 2767 2768 if (structurize_once < 2 || true) { 2769 Changed |= structurizeRegion(Region); 2770 structurize_once++; 2771 } 2772 return Changed; 2773 } 2774 2775 void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { 2776 DEBUG(dbgs() << "Fallthrough Map:\n"); 2777 for (auto &MBBI : MF) { 2778 MachineBasicBlock *MBB = MBBI.getFallThrough(); 2779 if (MBB != nullptr) { 2780 DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> " 2781 << MBB->getNumber() << "\n"); 2782 } 2783 FallthroughMap[&MBBI] = MBB; 2784 } 2785 } 2786 2787 void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region, 2788 unsigned SelectOut) { 2789 LinearizedRegion *LRegion = new LinearizedRegion(); 2790 if (SelectOut) { 2791 LRegion->addLiveOut(SelectOut); 2792 DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(SelectOut, TRI) 2793 << "\n"); 2794 } 2795 LRegion->setRegionMRT(Region); 2796 Region->setLinearizedRegion(LRegion); 2797 LRegion->setParent(Region->getParent() 2798 ? Region->getParent()->getLinearizedRegion() 2799 : nullptr); 2800 } 2801 2802 unsigned 2803 AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut, 2804 MachineRegisterInfo *MRI, 2805 const SIInstrInfo *TII) { 2806 if (MRT->isRegion()) { 2807 RegionMRT *Region = MRT->getRegionMRT(); 2808 Region->setBBSelectRegOut(SelectOut); 2809 unsigned InnerSelectOut = createBBSelectReg(TII, MRI); 2810 2811 // Fixme: Move linearization creation to the original spot 2812 createLinearizedRegion(Region, SelectOut); 2813 2814 for (auto CI = Region->getChildren()->begin(), 2815 CE = Region->getChildren()->end(); 2816 CI != CE; ++CI) { 2817 InnerSelectOut = 2818 initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII); 2819 } 2820 MRT->setBBSelectRegIn(InnerSelectOut); 2821 return InnerSelectOut; 2822 } else { 2823 MRT->setBBSelectRegOut(SelectOut); 2824 unsigned NewSelectIn = createBBSelectReg(TII, MRI); 2825 MRT->setBBSelectRegIn(NewSelectIn); 2826 return NewSelectIn; 2827 } 2828 } 2829 2830 static void checkRegOnlyPHIInputs(MachineFunction &MF) { 2831 for (auto &MBBI : MF) { 2832 for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(), 2833 E = MBBI.instr_end(); 2834 I != E; ++I) { 2835 MachineInstr &Instr = *I; 2836 if (Instr.isPHI()) { 2837 int numPreds = getPHINumInputs(Instr); 2838 for (int i = 0; i < numPreds; ++i) { 2839 assert(Instr.getOperand(i * 2 + 1).isReg() && 2840 "PHI Operand not a register"); 2841 } 2842 } 2843 } 2844 } 2845 } 2846 2847 2848 INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", 2849 "AMDGPU Machine CFG Structurizer", false, false) 2850 INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) 2851 INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", 2852 "AMDGPU Machine CFG Structurizer", false, false) 2853 2854 char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID; 2855 2856 2857 bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { 2858 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 2859 const SIInstrInfo *TII = ST.getInstrInfo(); 2860 TRI = ST.getRegisterInfo(); 2861 MRI = &(MF.getRegInfo()); 2862 initFallthroughMap(MF); 2863 2864 checkRegOnlyPHIInputs(MF); 2865 DEBUG(dbgs() << "----STRUCTURIZER START----\n"); 2866 DEBUG(MF.dump()); 2867 2868 Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo()); 2869 DEBUG(Regions->dump()); 2870 2871 RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI); 2872 setRegionMRT(RTree); 2873 initializeSelectRegisters(RTree, 0, MRI, TII); 2874 DEBUG(RTree->dump(TRI)); 2875 bool result = structurizeRegions(RTree, true); 2876 delete RTree; 2877 DEBUG(dbgs() << "----STRUCTURIZER END----\n"); 2878 initFallthroughMap(MF); 2879 return result; 2880 } 2881 2882 FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() { 2883 return new AMDGPUMachineCFGStructurizer(); 2884 } 2885