1 //===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the machine instruction level CFG structurizer pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "AMDGPUSubtarget.h" 16 #include "SIInstrInfo.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/Analysis/CFG.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegionInfo.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/IR/DebugLoc.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Target/TargetInstrInfo.h" 33 #include "llvm/Target/TargetLowering.h" 34 #include "llvm/Target/TargetSubtargetInfo.h" 35 #include <tuple> 36 using namespace llvm; 37 38 #define DEBUG_TYPE "amdgpucfgstructurizer" 39 40 namespace { 41 class PHILinearizeDestIterator; 42 43 class PHILinearize { 44 friend class PHILinearizeDestIterator; 45 46 public: 47 typedef std::pair<unsigned, MachineBasicBlock *> PHISourceT; 48 49 private: 50 typedef DenseSet<PHISourceT> PHISourcesT; 51 typedef struct { 52 unsigned DestReg; 53 DebugLoc DL; 54 PHISourcesT Sources; 55 } PHIInfoElementT; 56 typedef SmallPtrSet<PHIInfoElementT *, 2> PHIInfoT; 57 PHIInfoT PHIInfo; 58 59 static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); 60 static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef); 61 static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info); 62 static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg, 63 MachineBasicBlock *SourceMBB); 64 static void phiInfoElementRemoveSource(PHIInfoElementT *Info, 65 unsigned SourceReg, 66 MachineBasicBlock *SourceMBB); 67 PHIInfoElementT *findPHIInfoElement(unsigned DestReg); 68 PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg, 69 MachineBasicBlock *SourceMBB); 70 71 public: 72 bool findSourcesFromMBB(MachineBasicBlock *SourceMBB, 73 SmallVector<unsigned, 4> &Sources); 74 void addDest(unsigned DestReg, const DebugLoc &DL); 75 void replaceDef(unsigned OldDestReg, unsigned NewDestReg); 76 void deleteDef(unsigned DestReg); 77 void addSource(unsigned DestReg, unsigned SourceReg, 78 MachineBasicBlock *SourceMBB); 79 void removeSource(unsigned DestReg, unsigned SourceReg, 80 MachineBasicBlock *SourceMBB = nullptr); 81 bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, 82 unsigned &DestReg); 83 bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr); 84 unsigned getNumSources(unsigned DestReg); 85 void dump(MachineRegisterInfo *MRI); 86 void clear(); 87 88 typedef PHISourcesT::iterator source_iterator; 89 typedef PHILinearizeDestIterator dest_iterator; 90 91 dest_iterator dests_begin(); 92 dest_iterator dests_end(); 93 94 source_iterator sources_begin(unsigned Reg); 95 source_iterator sources_end(unsigned Reg); 96 }; 97 98 class PHILinearizeDestIterator { 99 private: 100 PHILinearize::PHIInfoT::iterator Iter; 101 102 public: 103 unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); } 104 PHILinearizeDestIterator &operator++() { 105 ++Iter; 106 return *this; 107 } 108 bool operator==(const PHILinearizeDestIterator &I) const { 109 return I.Iter == Iter; 110 } 111 bool operator!=(const PHILinearizeDestIterator &I) const { 112 return I.Iter != Iter; 113 } 114 115 PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} 116 }; 117 118 unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { 119 return Info->DestReg; 120 } 121 122 void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info, 123 unsigned NewDef) { 124 Info->DestReg = NewDef; 125 } 126 127 PHILinearize::PHISourcesT & 128 PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) { 129 return Info->Sources; 130 } 131 132 void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info, 133 unsigned SourceReg, 134 MachineBasicBlock *SourceMBB) { 135 // Assertion ensures we don't use the same SourceMBB for the 136 // sources, because we cannot have different registers with 137 // identical predecessors, but we can have the same register for 138 // multiple predecessors. 139 #if !defined(NDEBUG) 140 for (auto SI : phiInfoElementGetSources(Info)) { 141 assert((SI.second != SourceMBB || SourceReg == SI.first)); 142 } 143 #endif 144 145 phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB)); 146 } 147 148 void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info, 149 unsigned SourceReg, 150 MachineBasicBlock *SourceMBB) { 151 auto &Sources = phiInfoElementGetSources(Info); 152 SmallVector<PHISourceT, 4> ElimiatedSources; 153 for (auto SI : Sources) { 154 if (SI.first == SourceReg && 155 (SI.second == nullptr || SI.second == SourceMBB)) { 156 ElimiatedSources.push_back(PHISourceT(SI.first, SI.second)); 157 } 158 } 159 160 for (auto &Source : ElimiatedSources) { 161 Sources.erase(Source); 162 } 163 } 164 165 PHILinearize::PHIInfoElementT * 166 PHILinearize::findPHIInfoElement(unsigned DestReg) { 167 for (auto I : PHIInfo) { 168 if (phiInfoElementGetDest(I) == DestReg) { 169 return I; 170 } 171 } 172 return nullptr; 173 } 174 175 PHILinearize::PHIInfoElementT * 176 PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg, 177 MachineBasicBlock *SourceMBB) { 178 for (auto I : PHIInfo) { 179 for (auto SI : phiInfoElementGetSources(I)) { 180 if (SI.first == SourceReg && 181 (SI.second == nullptr || SI.second == SourceMBB)) { 182 return I; 183 } 184 } 185 } 186 return nullptr; 187 } 188 189 bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB, 190 SmallVector<unsigned, 4> &Sources) { 191 bool FoundSource = false; 192 for (auto I : PHIInfo) { 193 for (auto SI : phiInfoElementGetSources(I)) { 194 if (SI.second == SourceMBB) { 195 FoundSource = true; 196 Sources.push_back(SI.first); 197 } 198 } 199 } 200 return FoundSource; 201 } 202 203 void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) { 204 assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists"); 205 PHISourcesT EmptySet; 206 PHIInfoElementT *NewElement = new PHIInfoElementT(); 207 NewElement->DestReg = DestReg; 208 NewElement->DL = DL; 209 NewElement->Sources = EmptySet; 210 PHIInfo.insert(NewElement); 211 } 212 213 void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) { 214 phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg); 215 } 216 217 void PHILinearize::deleteDef(unsigned DestReg) { 218 PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg); 219 PHIInfo.erase(InfoElement); 220 delete InfoElement; 221 } 222 223 void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg, 224 MachineBasicBlock *SourceMBB) { 225 phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); 226 } 227 228 void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg, 229 MachineBasicBlock *SourceMBB) { 230 phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); 231 } 232 233 bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, 234 unsigned &DestReg) { 235 PHIInfoElementT *InfoElement = 236 findPHIInfoElementFromSource(SourceReg, SourceMBB); 237 if (InfoElement != nullptr) { 238 DestReg = phiInfoElementGetDest(InfoElement); 239 return true; 240 } 241 return false; 242 } 243 244 bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) { 245 unsigned DestReg; 246 return findDest(Reg, SourceMBB, DestReg); 247 } 248 249 unsigned PHILinearize::getNumSources(unsigned DestReg) { 250 return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size(); 251 } 252 253 void PHILinearize::dump(MachineRegisterInfo *MRI) { 254 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 255 dbgs() << "=PHIInfo Start=\n"; 256 for (auto PII : this->PHIInfo) { 257 PHIInfoElementT &Element = *PII; 258 dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI) 259 << " Sources: {"; 260 for (auto &SI : Element.Sources) { 261 dbgs() << PrintReg(SI.first, TRI) << "(BB#" 262 << SI.second->getNumber() << "),"; 263 } 264 dbgs() << "}\n"; 265 } 266 dbgs() << "=PHIInfo End=\n"; 267 } 268 269 void PHILinearize::clear() { PHIInfo = PHIInfoT(); } 270 271 PHILinearize::dest_iterator PHILinearize::dests_begin() { 272 return PHILinearizeDestIterator(PHIInfo.begin()); 273 } 274 275 PHILinearize::dest_iterator PHILinearize::dests_end() { 276 return PHILinearizeDestIterator(PHIInfo.end()); 277 } 278 279 PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) { 280 auto InfoElement = findPHIInfoElement(Reg); 281 return phiInfoElementGetSources(InfoElement).begin(); 282 } 283 PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) { 284 auto InfoElement = findPHIInfoElement(Reg); 285 return phiInfoElementGetSources(InfoElement).end(); 286 } 287 288 class RegionMRT; 289 class MBBMRT; 290 291 static unsigned getPHINumInputs(MachineInstr &PHI) { 292 assert(PHI.isPHI()); 293 return (PHI.getNumOperands() - 1) / 2; 294 } 295 296 static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) { 297 assert(PHI.isPHI()); 298 return PHI.getOperand(Index * 2 + 2).getMBB(); 299 } 300 301 static void setPhiPred(MachineInstr &PHI, unsigned Index, 302 MachineBasicBlock *NewPred) { 303 PHI.getOperand(Index * 2 + 2).setMBB(NewPred); 304 } 305 306 static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) { 307 assert(PHI.isPHI()); 308 return PHI.getOperand(Index * 2 + 1).getReg(); 309 } 310 311 static unsigned getPHIDestReg(MachineInstr &PHI) { 312 assert(PHI.isPHI()); 313 return PHI.getOperand(0).getReg(); 314 } 315 316 class LinearizedRegion { 317 protected: 318 MachineBasicBlock *Entry; 319 // The exit block is part of the region, and is the last 320 // merge block before exiting the region. 321 MachineBasicBlock *Exit; 322 DenseSet<unsigned> LiveOuts; 323 SmallPtrSet<MachineBasicBlock *, 1> MBBs; 324 bool HasLoop; 325 LinearizedRegion *Parent; 326 RegionMRT *RMRT; 327 328 void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, 329 MachineInstr *DefInstr, const MachineRegisterInfo *MRI, 330 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 331 332 void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, 333 MachineInstr *DefInstr, 334 const MachineRegisterInfo *MRI, 335 const TargetRegisterInfo *TRI, 336 PHILinearize &PHIInfo); 337 338 void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 339 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, 340 RegionMRT *TopRegion); 341 342 void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 343 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 344 345 void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI, 346 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, 347 RegionMRT *TopRegion = nullptr); 348 349 public: 350 void setRegionMRT(RegionMRT *Region) { RMRT = Region; } 351 352 RegionMRT *getRegionMRT() { return RMRT; } 353 354 void setParent(LinearizedRegion *P) { Parent = P; } 355 356 LinearizedRegion *getParent() { return Parent; } 357 358 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr); 359 360 void setBBSelectRegIn(unsigned Reg); 361 362 unsigned getBBSelectRegIn(); 363 364 void setBBSelectRegOut(unsigned Reg, bool IsLiveOut); 365 366 unsigned getBBSelectRegOut(); 367 368 void setHasLoop(bool Value); 369 370 bool getHasLoop(); 371 372 void addLiveOut(unsigned VReg); 373 374 void removeLiveOut(unsigned Reg); 375 376 void replaceLiveOut(unsigned OldReg, unsigned NewReg); 377 378 void replaceRegister(unsigned Register, unsigned NewRegister, 379 MachineRegisterInfo *MRI, bool ReplaceInside, 380 bool ReplaceOutside, bool IncludeLoopPHIs); 381 382 void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister, 383 bool IncludeLoopPHIs, 384 MachineRegisterInfo *MRI); 385 386 void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister, 387 bool IncludeLoopPHIs, 388 MachineRegisterInfo *MRI); 389 390 DenseSet<unsigned> *getLiveOuts(); 391 392 void setEntry(MachineBasicBlock *NewEntry); 393 394 MachineBasicBlock *getEntry(); 395 396 void setExit(MachineBasicBlock *NewExit); 397 398 MachineBasicBlock *getExit(); 399 400 void addMBB(MachineBasicBlock *MBB); 401 402 void addMBBs(LinearizedRegion *InnerRegion); 403 404 bool contains(MachineBasicBlock *MBB); 405 406 bool isLiveOut(unsigned Reg); 407 408 bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI); 409 410 void removeFalseRegisterKills(MachineRegisterInfo *MRI); 411 412 void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI, 413 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 414 415 LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 416 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 417 418 LinearizedRegion(); 419 420 ~LinearizedRegion(); 421 }; 422 423 class MRT { 424 protected: 425 RegionMRT *Parent; 426 unsigned BBSelectRegIn; 427 unsigned BBSelectRegOut; 428 429 public: 430 unsigned getBBSelectRegIn() { return BBSelectRegIn; } 431 432 unsigned getBBSelectRegOut() { return BBSelectRegOut; } 433 434 void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; } 435 436 void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; } 437 438 virtual RegionMRT *getRegionMRT() { return nullptr; } 439 440 virtual MBBMRT *getMBBMRT() { return nullptr; } 441 442 bool isRegion() { return getRegionMRT() != nullptr; } 443 444 bool isMBB() { return getMBBMRT() != nullptr; } 445 446 bool isRoot() { return Parent == nullptr; } 447 448 void setParent(RegionMRT *Region) { Parent = Region; } 449 450 RegionMRT *getParent() { return Parent; } 451 452 static MachineBasicBlock * 453 initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, 454 DenseMap<MachineRegion *, RegionMRT *> &RegionMap); 455 456 static RegionMRT *buildMRT(MachineFunction &MF, 457 const MachineRegionInfo *RegionInfo, 458 const SIInstrInfo *TII, 459 MachineRegisterInfo *MRI); 460 461 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0; 462 463 void dumpDepth(int depth) { 464 for (int i = depth; i > 0; --i) { 465 dbgs() << " "; 466 } 467 } 468 469 virtual ~MRT() {} 470 }; 471 472 class MBBMRT : public MRT { 473 MachineBasicBlock *MBB; 474 475 public: 476 virtual MBBMRT *getMBBMRT() { return this; } 477 478 MachineBasicBlock *getMBB() { return MBB; } 479 480 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { 481 dumpDepth(depth); 482 dbgs() << "MBB: " << getMBB()->getNumber(); 483 dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); 484 dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; 485 } 486 487 MBBMRT(MachineBasicBlock *BB) : MBB(BB) { 488 setParent(nullptr); 489 setBBSelectRegOut(0); 490 setBBSelectRegIn(0); 491 } 492 }; 493 494 class RegionMRT : public MRT { 495 protected: 496 MachineRegion *Region; 497 LinearizedRegion *LRegion; 498 MachineBasicBlock *Succ; 499 500 SetVector<MRT *> Children; 501 502 public: 503 virtual RegionMRT *getRegionMRT() { return this; } 504 505 void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { 506 LRegion = LinearizeRegion; 507 } 508 509 LinearizedRegion *getLinearizedRegion() { return LRegion; } 510 511 MachineRegion *getMachineRegion() { return Region; } 512 513 unsigned getInnerOutputRegister() { 514 return (*(Children.begin()))->getBBSelectRegOut(); 515 } 516 517 void addChild(MRT *Tree) { Children.insert(Tree); } 518 519 SetVector<MRT *> *getChildren() { return &Children; } 520 521 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { 522 dumpDepth(depth); 523 dbgs() << "Region: " << (void *)Region; 524 dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); 525 dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; 526 527 dumpDepth(depth); 528 if (getSucc()) 529 dbgs() << "Succ: " << getSucc()->getNumber() << "\n"; 530 else 531 dbgs() << "Succ: none \n"; 532 for (auto MRTI : Children) { 533 MRTI->dump(TRI, depth + 1); 534 } 535 } 536 537 MRT *getEntryTree() { return Children.back(); } 538 539 MRT *getExitTree() { return Children.front(); } 540 541 MachineBasicBlock *getEntry() { 542 MRT *Tree = Children.back(); 543 return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry() 544 : Tree->getMBBMRT()->getMBB(); 545 } 546 547 MachineBasicBlock *getExit() { 548 MRT *Tree = Children.front(); 549 return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit() 550 : Tree->getMBBMRT()->getMBB(); 551 } 552 553 void setSucc(MachineBasicBlock *MBB) { Succ = MBB; } 554 555 MachineBasicBlock *getSucc() { return Succ; } 556 557 bool contains(MachineBasicBlock *MBB) { 558 for (auto CI : Children) { 559 if (CI->isMBB()) { 560 if (MBB == CI->getMBBMRT()->getMBB()) { 561 return true; 562 } 563 } else { 564 if (CI->getRegionMRT()->contains(MBB)) { 565 return true; 566 } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr && 567 CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) { 568 return true; 569 } 570 } 571 } 572 return false; 573 } 574 575 void replaceLiveOutReg(unsigned Register, unsigned NewRegister) { 576 LinearizedRegion *LRegion = getLinearizedRegion(); 577 LRegion->replaceLiveOut(Register, NewRegister); 578 for (auto &CI : Children) { 579 if (CI->isRegion()) { 580 CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister); 581 } 582 } 583 } 584 585 RegionMRT(MachineRegion *MachineRegion) 586 : Region(MachineRegion), LRegion(nullptr), Succ(nullptr) { 587 setParent(nullptr); 588 setBBSelectRegOut(0); 589 setBBSelectRegIn(0); 590 } 591 592 virtual ~RegionMRT() { 593 if (LRegion) { 594 delete LRegion; 595 } 596 597 for (auto CI : Children) { 598 delete &(*CI); 599 } 600 } 601 }; 602 603 static unsigned createBBSelectReg(const SIInstrInfo *TII, 604 MachineRegisterInfo *MRI) { 605 return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); 606 } 607 608 MachineBasicBlock * 609 MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, 610 DenseMap<MachineRegion *, RegionMRT *> &RegionMap) { 611 for (auto &MFI : MF) { 612 MachineBasicBlock *ExitMBB = &MFI; 613 if (ExitMBB->succ_size() == 0) { 614 return ExitMBB; 615 } 616 } 617 llvm_unreachable("CFG has no exit block"); 618 return nullptr; 619 } 620 621 RegionMRT *MRT::buildMRT(MachineFunction &MF, 622 const MachineRegionInfo *RegionInfo, 623 const SIInstrInfo *TII, MachineRegisterInfo *MRI) { 624 SmallPtrSet<MachineRegion *, 4> PlacedRegions; 625 DenseMap<MachineRegion *, RegionMRT *> RegionMap; 626 MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion(); 627 RegionMRT *Result = new RegionMRT(TopLevelRegion); 628 RegionMap[TopLevelRegion] = Result; 629 630 // Insert the exit block first, we need it to be the merge node 631 // for the top level region. 632 MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap); 633 634 unsigned BBSelectRegIn = createBBSelectReg(TII, MRI); 635 MBBMRT *ExitMRT = new MBBMRT(Exit); 636 RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT); 637 ExitMRT->setBBSelectRegIn(BBSelectRegIn); 638 639 for (auto MBBI : post_order(&(MF.front()))) { 640 MachineBasicBlock *MBB = &(*MBBI); 641 642 // Skip Exit since we already added it 643 if (MBB == Exit) { 644 continue; 645 } 646 647 DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n"); 648 MBBMRT *NewMBB = new MBBMRT(MBB); 649 MachineRegion *Region = RegionInfo->getRegionFor(MBB); 650 651 // Ensure we have the MRT region 652 if (RegionMap.count(Region) == 0) { 653 RegionMRT *NewMRTRegion = new RegionMRT(Region); 654 RegionMap[Region] = NewMRTRegion; 655 656 // Ensure all parents are in the RegionMap 657 MachineRegion *Parent = Region->getParent(); 658 while (RegionMap.count(Parent) == 0) { 659 RegionMRT *NewMRTParent = new RegionMRT(Parent); 660 NewMRTParent->addChild(NewMRTRegion); 661 NewMRTRegion->setParent(NewMRTParent); 662 RegionMap[Parent] = NewMRTParent; 663 NewMRTRegion = NewMRTParent; 664 Parent = Parent->getParent(); 665 } 666 RegionMap[Parent]->addChild(NewMRTRegion); 667 NewMRTRegion->setParent(RegionMap[Parent]); 668 } 669 670 // Add MBB to Region MRT 671 RegionMap[Region]->addChild(NewMBB); 672 NewMBB->setParent(RegionMap[Region]); 673 RegionMap[Region]->setSucc(Region->getExit()); 674 } 675 return Result; 676 } 677 678 void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, 679 MachineInstr *DefInstr, 680 const MachineRegisterInfo *MRI, 681 const TargetRegisterInfo *TRI, 682 PHILinearize &PHIInfo) { 683 if (TRI->isVirtualRegister(Reg)) { 684 DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); 685 // If this is a source register to a PHI we are chaining, it 686 // must be live out. 687 if (PHIInfo.isSource(Reg)) { 688 DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n"); 689 addLiveOut(Reg); 690 } else { 691 // If this is live out of the MBB 692 for (auto &UI : MRI->use_operands(Reg)) { 693 if (UI.getParent()->getParent() != MBB) { 694 DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber() 695 << "): " << PrintReg(Reg, TRI) << "\n"); 696 addLiveOut(Reg); 697 } else { 698 // If the use is in the same MBB we have to make sure 699 // it is after the def, otherwise it is live out in a loop 700 MachineInstr *UseInstr = UI.getParent(); 701 for (MachineBasicBlock::instr_iterator 702 MII = UseInstr->getIterator(), 703 MIE = UseInstr->getParent()->instr_end(); 704 MII != MIE; ++MII) { 705 if ((&(*MII)) == DefInstr) { 706 DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI) 707 << "\n"); 708 addLiveOut(Reg); 709 } 710 } 711 } 712 } 713 } 714 } 715 } 716 717 void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, 718 MachineInstr *DefInstr, 719 const MachineRegisterInfo *MRI, 720 const TargetRegisterInfo *TRI, 721 PHILinearize &PHIInfo) { 722 if (TRI->isVirtualRegister(Reg)) { 723 DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); 724 for (auto &UI : MRI->use_operands(Reg)) { 725 if (!Region->contains(UI.getParent()->getParent())) { 726 DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region 727 << "): " << PrintReg(Reg, TRI) << "\n"); 728 addLiveOut(Reg); 729 } 730 } 731 } 732 } 733 734 void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, 735 const MachineRegisterInfo *MRI, 736 const TargetRegisterInfo *TRI, 737 PHILinearize &PHIInfo) { 738 DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n"); 739 for (auto &II : *MBB) { 740 for (auto &RI : II.defs()) { 741 storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); 742 } 743 for (auto &IRI : II.implicit_operands()) { 744 if (IRI.isDef()) { 745 storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo); 746 } 747 } 748 } 749 750 // If we have a successor with a PHI, source coming from this MBB we have to 751 // add the register as live out 752 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 753 E = MBB->succ_end(); 754 SI != E; ++SI) { 755 for (auto &II : *(*SI)) { 756 if (II.isPHI()) { 757 MachineInstr &PHI = II; 758 int numPreds = getPHINumInputs(PHI); 759 for (int i = 0; i < numPreds; ++i) { 760 if (getPHIPred(PHI, i) == MBB) { 761 unsigned PHIReg = getPHISourceReg(PHI, i); 762 DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber() 763 << " -> BB#" << (*SI)->getNumber() 764 << "): " << PrintReg(PHIReg, TRI) << "\n"); 765 addLiveOut(PHIReg); 766 } 767 } 768 } 769 } 770 } 771 772 DEBUG(dbgs() << "-Store Live Outs Endn-\n"); 773 } 774 775 void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB, 776 const MachineRegisterInfo *MRI, 777 const TargetRegisterInfo *TRI, 778 PHILinearize &PHIInfo, 779 RegionMRT *TopRegion) { 780 for (auto &II : *MBB) { 781 for (auto &RI : II.defs()) { 782 storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI, 783 PHIInfo); 784 } 785 for (auto &IRI : II.implicit_operands()) { 786 if (IRI.isDef()) { 787 storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI, 788 TRI, PHIInfo); 789 } 790 } 791 } 792 } 793 794 void LinearizedRegion::storeLiveOuts(RegionMRT *Region, 795 const MachineRegisterInfo *MRI, 796 const TargetRegisterInfo *TRI, 797 PHILinearize &PHIInfo, 798 RegionMRT *CurrentTopRegion) { 799 MachineBasicBlock *Exit = Region->getSucc(); 800 801 RegionMRT *TopRegion = 802 CurrentTopRegion == nullptr ? Region : CurrentTopRegion; 803 804 // Check if exit is end of function, if so, no live outs. 805 if (Exit == nullptr) 806 return; 807 808 auto Children = Region->getChildren(); 809 for (auto CI : *Children) { 810 if (CI->isMBB()) { 811 auto MBB = CI->getMBBMRT()->getMBB(); 812 storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion); 813 } else { 814 LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion(); 815 // We should be limited to only store registers that are live out from the 816 // lineaized region 817 for (auto MBBI : SubRegion->MBBs) { 818 storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion); 819 } 820 } 821 } 822 823 if (CurrentTopRegion == nullptr) { 824 auto Succ = Region->getSucc(); 825 for (auto &II : *Succ) { 826 if (II.isPHI()) { 827 MachineInstr &PHI = II; 828 int numPreds = getPHINumInputs(PHI); 829 for (int i = 0; i < numPreds; ++i) { 830 if (Region->contains(getPHIPred(PHI, i))) { 831 unsigned PHIReg = getPHISourceReg(PHI, i); 832 DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region 833 << "): " << PrintReg(PHIReg, TRI) << "\n"); 834 addLiveOut(PHIReg); 835 } 836 } 837 } 838 } 839 } 840 } 841 842 void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { 843 OS << "Linearized Region {"; 844 bool IsFirst = true; 845 for (const auto &MBB : MBBs) { 846 if (IsFirst) { 847 IsFirst = false; 848 } else { 849 OS << " ,"; 850 } 851 OS << MBB->getNumber(); 852 } 853 OS << "} (" << Entry->getNumber() << ", " 854 << (Exit == nullptr ? -1 : Exit->getNumber()) 855 << "): In:" << PrintReg(getBBSelectRegIn(), TRI) 856 << " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {"; 857 for (auto &LI : LiveOuts) { 858 OS << PrintReg(LI, TRI) << " "; 859 } 860 OS << "} \n"; 861 } 862 863 unsigned LinearizedRegion::getBBSelectRegIn() { 864 return getRegionMRT()->getBBSelectRegIn(); 865 } 866 867 unsigned LinearizedRegion::getBBSelectRegOut() { 868 return getRegionMRT()->getBBSelectRegOut(); 869 } 870 871 void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; } 872 873 bool LinearizedRegion::getHasLoop() { return HasLoop; } 874 875 void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); } 876 877 void LinearizedRegion::removeLiveOut(unsigned Reg) { 878 if (isLiveOut(Reg)) 879 LiveOuts.erase(Reg); 880 } 881 882 void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) { 883 if (isLiveOut(OldReg)) { 884 removeLiveOut(OldReg); 885 addLiveOut(NewReg); 886 } 887 } 888 889 void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, 890 MachineRegisterInfo *MRI, 891 bool ReplaceInside, bool ReplaceOutside, 892 bool IncludeLoopPHI) { 893 assert(Register != NewRegister && "Cannot replace a reg with itself"); 894 895 DEBUG(dbgs() << "Pepareing to replace register (region): " 896 << PrintReg(Register, MRI->getTargetRegisterInfo()) << " with " 897 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); 898 899 // If we are replacing outside, we also need to update the LiveOuts 900 if (ReplaceOutside && 901 (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) { 902 LinearizedRegion *Current = this; 903 while (Current != nullptr && Current->getEntry() != nullptr) { 904 DEBUG(dbgs() << "Region before register replace\n"); 905 DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); 906 Current->replaceLiveOut(Register, NewRegister); 907 DEBUG(dbgs() << "Region after register replace\n"); 908 DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); 909 Current = Current->getParent(); 910 } 911 } 912 913 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 914 E = MRI->reg_end(); 915 I != E;) { 916 MachineOperand &O = *I; 917 ++I; 918 919 // We don't rewrite defs. 920 if (O.isDef()) 921 continue; 922 923 bool IsInside = contains(O.getParent()->getParent()); 924 bool IsLoopPHI = IsInside && (O.getParent()->isPHI() && 925 O.getParent()->getParent() == getEntry()); 926 bool ShouldReplace = (IsInside && ReplaceInside) || 927 (!IsInside && ReplaceOutside) || 928 (IncludeLoopPHI && IsLoopPHI); 929 if (ShouldReplace) { 930 931 if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { 932 DEBUG(dbgs() << "Trying to substitute physical register: " 933 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 934 << "\n"); 935 llvm_unreachable("Cannot substitute physical registers"); 936 } else { 937 DEBUG(dbgs() << "Replacing register (region): " 938 << PrintReg(Register, MRI->getTargetRegisterInfo()) 939 << " with " 940 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 941 << "\n"); 942 O.setReg(NewRegister); 943 } 944 } 945 } 946 } 947 948 void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register, 949 unsigned NewRegister, 950 bool IncludeLoopPHIs, 951 MachineRegisterInfo *MRI) { 952 replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs); 953 } 954 955 void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register, 956 unsigned NewRegister, 957 bool IncludeLoopPHIs, 958 MachineRegisterInfo *MRI) { 959 replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs); 960 } 961 962 DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; } 963 964 void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) { 965 Entry = NewEntry; 966 } 967 968 MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; } 969 970 void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; } 971 972 MachineBasicBlock *LinearizedRegion::getExit() { return Exit; } 973 974 void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); } 975 976 void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) { 977 for (const auto &MBB : InnerRegion->MBBs) { 978 addMBB(MBB); 979 } 980 } 981 982 bool LinearizedRegion::contains(MachineBasicBlock *MBB) { 983 return MBBs.count(MBB) == 1; 984 } 985 986 bool LinearizedRegion::isLiveOut(unsigned Reg) { 987 return LiveOuts.count(Reg) == 1; 988 } 989 990 bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { 991 return MRI->def_begin(Reg) == MRI->def_end(); 992 } 993 994 // After the code has been structurized, what was flagged as kills 995 // before are no longer register kills. 996 void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { 997 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 998 for (auto MBBI : MBBs) { 999 MachineBasicBlock *MBB = MBBI; 1000 for (auto &II : *MBB) { 1001 for (auto &RI : II.uses()) { 1002 if (RI.isReg()) { 1003 unsigned Reg = RI.getReg(); 1004 if (TRI->isVirtualRegister(Reg)) { 1005 if (hasNoDef(Reg, MRI)) 1006 continue; 1007 if (!MRI->hasOneDef(Reg)) { 1008 DEBUG(this->getEntry()->getParent()->dump()); 1009 DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n"); 1010 } 1011 1012 if (MRI->def_begin(Reg) == MRI->def_end()) { 1013 DEBUG(dbgs() << "Register " 1014 << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1015 << " has NO defs\n"); 1016 } else if (!MRI->hasOneDef(Reg)) { 1017 DEBUG(dbgs() << "Register " 1018 << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1019 << " has multiple defs\n"); 1020 } 1021 1022 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 1023 MachineOperand *Def = &(*(MRI->def_begin(Reg))); 1024 MachineOperand *UseOperand = &(RI); 1025 bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; 1026 if (UseIsOutsideDefMBB && UseOperand->isKill()) { 1027 DEBUG(dbgs() << "Removing kill flag on register: " 1028 << PrintReg(Reg, TRI) << "\n"); 1029 UseOperand->setIsKill(false); 1030 } 1031 } 1032 } 1033 } 1034 } 1035 } 1036 } 1037 1038 void LinearizedRegion::initLiveOut(RegionMRT *Region, 1039 const MachineRegisterInfo *MRI, 1040 const TargetRegisterInfo *TRI, 1041 PHILinearize &PHIInfo) { 1042 storeLiveOuts(Region, MRI, TRI, PHIInfo); 1043 } 1044 1045 LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB, 1046 const MachineRegisterInfo *MRI, 1047 const TargetRegisterInfo *TRI, 1048 PHILinearize &PHIInfo) { 1049 setEntry(MBB); 1050 setExit(MBB); 1051 storeLiveOuts(MBB, MRI, TRI, PHIInfo); 1052 MBBs.insert(MBB); 1053 Parent = nullptr; 1054 } 1055 1056 LinearizedRegion::LinearizedRegion() { 1057 setEntry(nullptr); 1058 setExit(nullptr); 1059 Parent = nullptr; 1060 } 1061 1062 LinearizedRegion::~LinearizedRegion() {} 1063 1064 class AMDGPUMachineCFGStructurizer : public MachineFunctionPass { 1065 private: 1066 const MachineRegionInfo *Regions; 1067 const SIInstrInfo *TII; 1068 const TargetRegisterInfo *TRI; 1069 MachineRegisterInfo *MRI; 1070 unsigned BBSelectRegister; 1071 PHILinearize PHIInfo; 1072 DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap; 1073 1074 void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI, 1075 SmallVector<unsigned, 2> &RegionIndices); 1076 void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, 1077 SmallVector<unsigned, 2> &RegionIndices); 1078 void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, 1079 SmallVector<unsigned, 2> &PHINonRegionIndices); 1080 1081 void storePHILinearizationInfoDest( 1082 unsigned LDestReg, MachineInstr &PHI, 1083 SmallVector<unsigned, 2> *RegionIndices = nullptr); 1084 1085 unsigned storePHILinearizationInfo(MachineInstr &PHI, 1086 SmallVector<unsigned, 2> *RegionIndices); 1087 1088 void extractKilledPHIs(MachineBasicBlock *MBB); 1089 1090 bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices, 1091 unsigned *ReplaceReg); 1092 1093 bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1094 MachineBasicBlock *SourceMBB, 1095 SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg); 1096 1097 void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1098 MachineBasicBlock *LastMerge, 1099 SmallVector<unsigned, 2> &PHIRegionIndices); 1100 void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1101 MachineBasicBlock *IfMBB, 1102 SmallVector<unsigned, 2> &PHIRegionIndices); 1103 void replaceLiveOutRegs(MachineInstr &PHI, 1104 SmallVector<unsigned, 2> &PHIRegionIndices, 1105 unsigned CombinedSourceReg, 1106 LinearizedRegion *LRegion); 1107 void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge, 1108 MachineInstr &PHI, LinearizedRegion *LRegion); 1109 1110 void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge, 1111 LinearizedRegion *LRegion); 1112 void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB, 1113 MachineInstr &PHI); 1114 void rewriteRegionEntryPHIs(LinearizedRegion *Region, 1115 MachineBasicBlock *IfMBB); 1116 1117 bool regionIsSimpleIf(RegionMRT *Region); 1118 1119 void transformSimpleIfRegion(RegionMRT *Region); 1120 1121 void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II); 1122 1123 void insertUnconditionalBranch(MachineBasicBlock *MBB, 1124 MachineBasicBlock *Dest, 1125 const DebugLoc &DL = DebugLoc()); 1126 1127 MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region); 1128 1129 void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1130 MachineBasicBlock *MergeBB, unsigned DestRegister, 1131 unsigned IfSourceRegister, unsigned CodeSourceRegister, 1132 bool IsUndefIfSource = false); 1133 1134 MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB, 1135 MachineBasicBlock *CodeBBStart, 1136 MachineBasicBlock *CodeBBEnd, 1137 MachineBasicBlock *SelectBB, unsigned IfReg, 1138 bool InheritPreds); 1139 1140 void prunePHIInfo(MachineBasicBlock *MBB); 1141 void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg); 1142 1143 void createEntryPHIs(LinearizedRegion *CurrentRegion); 1144 void resolvePHIInfos(MachineBasicBlock *FunctionEntry); 1145 1146 void replaceRegisterWith(unsigned Register, unsigned NewRegister); 1147 1148 MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB, 1149 MachineBasicBlock *CodeBB, 1150 LinearizedRegion *LRegion, 1151 unsigned BBSelectRegIn, 1152 unsigned BBSelectRegOut); 1153 1154 MachineBasicBlock * 1155 createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion, 1156 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, 1157 unsigned BBSelectRegIn, unsigned BBSelectRegOut); 1158 void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond); 1159 1160 void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, 1161 MachineBasicBlock *MergeBB, 1162 unsigned BBSelectReg); 1163 1164 MachineInstr *getDefInstr(unsigned Reg); 1165 void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1166 MachineBasicBlock *MergeBB, 1167 LinearizedRegion *InnerRegion, unsigned DestReg, 1168 unsigned SourceReg); 1169 bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion, 1170 unsigned Register); 1171 void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1172 MachineBasicBlock *MergeBB, 1173 LinearizedRegion *InnerRegion, 1174 LinearizedRegion *LRegion); 1175 1176 void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry, 1177 MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion); 1178 void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc, 1179 LinearizedRegion *LRegion); 1180 1181 MachineBasicBlock *splitExit(LinearizedRegion *LRegion); 1182 1183 MachineBasicBlock *splitEntry(LinearizedRegion *LRegion); 1184 1185 LinearizedRegion *initLinearizedRegion(RegionMRT *Region); 1186 1187 bool structurizeComplexRegion(RegionMRT *Region); 1188 1189 bool structurizeRegion(RegionMRT *Region); 1190 1191 bool structurizeRegions(RegionMRT *Region, bool isTopRegion); 1192 1193 public: 1194 static char ID; 1195 1196 void getAnalysisUsage(AnalysisUsage &AU) const override { 1197 AU.addRequired<MachineRegionInfoPass>(); 1198 MachineFunctionPass::getAnalysisUsage(AU); 1199 } 1200 1201 AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) { 1202 initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); 1203 } 1204 1205 void initFallthroughMap(MachineFunction &MF); 1206 1207 void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut); 1208 1209 unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg, 1210 MachineRegisterInfo *MRI, 1211 const SIInstrInfo *TII); 1212 1213 RegionMRT *RMRT; 1214 void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; } 1215 1216 RegionMRT *getRegionMRT() { return RMRT; } 1217 1218 bool runOnMachineFunction(MachineFunction &MF) override; 1219 }; 1220 } 1221 1222 char AMDGPUMachineCFGStructurizer::ID = 0; 1223 1224 bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { 1225 MachineBasicBlock *Entry = Region->getEntry(); 1226 MachineBasicBlock *Succ = Region->getSucc(); 1227 bool FoundBypass = false; 1228 bool FoundIf = false; 1229 1230 if (Entry->succ_size() != 2) { 1231 return false; 1232 } 1233 1234 for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), 1235 E = Entry->succ_end(); 1236 SI != E; ++SI) { 1237 MachineBasicBlock *Current = *SI; 1238 1239 if (Current == Succ) { 1240 FoundBypass = true; 1241 } else if ((Current->succ_size() == 1) && 1242 *(Current->succ_begin()) == Succ) { 1243 FoundIf = true; 1244 } 1245 } 1246 1247 return FoundIf && FoundBypass; 1248 } 1249 1250 void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { 1251 MachineBasicBlock *Entry = Region->getEntry(); 1252 MachineBasicBlock *Exit = Region->getExit(); 1253 TII->convertNonUniformIfRegion(Entry, Exit); 1254 } 1255 1256 static void fixMBBTerminator(MachineBasicBlock *MBB) { 1257 1258 if (MBB->succ_size() == 1) { 1259 auto *Succ = *(MBB->succ_begin()); 1260 for (auto &TI : MBB->terminators()) { 1261 for (auto &UI : TI.uses()) { 1262 if (UI.isMBB() && UI.getMBB() != Succ) { 1263 UI.setMBB(Succ); 1264 } 1265 } 1266 } 1267 } 1268 } 1269 1270 static void fixRegionTerminator(RegionMRT *Region) { 1271 MachineBasicBlock *InternalSucc = nullptr; 1272 MachineBasicBlock *ExternalSucc = nullptr; 1273 LinearizedRegion *LRegion = Region->getLinearizedRegion(); 1274 auto Exit = LRegion->getExit(); 1275 1276 SmallPtrSet<MachineBasicBlock *, 2> Successors; 1277 for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(), 1278 SE = Exit->succ_end(); 1279 SI != SE; ++SI) { 1280 MachineBasicBlock *Succ = *SI; 1281 if (LRegion->contains(Succ)) { 1282 // Do not allow re-assign 1283 assert(InternalSucc == nullptr); 1284 InternalSucc = Succ; 1285 } else { 1286 // Do not allow re-assign 1287 assert(ExternalSucc == nullptr); 1288 ExternalSucc = Succ; 1289 } 1290 } 1291 1292 for (auto &TI : Exit->terminators()) { 1293 for (auto &UI : TI.uses()) { 1294 if (UI.isMBB()) { 1295 auto Target = UI.getMBB(); 1296 if (Target != InternalSucc && Target != ExternalSucc) { 1297 UI.setMBB(ExternalSucc); 1298 } 1299 } 1300 } 1301 } 1302 } 1303 1304 // If a region region is just a sequence of regions (and the exit 1305 // block in the case of the top level region), we can simply skip 1306 // linearizing it, because it is already linear 1307 bool regionIsSequence(RegionMRT *Region) { 1308 auto Children = Region->getChildren(); 1309 for (auto CI : *Children) { 1310 if (!CI->isRegion()) { 1311 if (CI->getMBBMRT()->getMBB()->succ_size() > 1) { 1312 return false; 1313 } 1314 } 1315 } 1316 return true; 1317 } 1318 1319 void fixupRegionExits(RegionMRT *Region) { 1320 auto Children = Region->getChildren(); 1321 for (auto CI : *Children) { 1322 if (!CI->isRegion()) { 1323 fixMBBTerminator(CI->getMBBMRT()->getMBB()); 1324 } else { 1325 fixRegionTerminator(CI->getRegionMRT()); 1326 } 1327 } 1328 } 1329 1330 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( 1331 RegionMRT *Region, MachineInstr &PHI, 1332 SmallVector<unsigned, 2> &PHIRegionIndices) { 1333 unsigned NumInputs = getPHINumInputs(PHI); 1334 for (unsigned i = 0; i < NumInputs; ++i) { 1335 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1336 if (Region->contains(Pred)) { 1337 PHIRegionIndices.push_back(i); 1338 } 1339 } 1340 } 1341 1342 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( 1343 LinearizedRegion *Region, MachineInstr &PHI, 1344 SmallVector<unsigned, 2> &PHIRegionIndices) { 1345 unsigned NumInputs = getPHINumInputs(PHI); 1346 for (unsigned i = 0; i < NumInputs; ++i) { 1347 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1348 if (Region->contains(Pred)) { 1349 PHIRegionIndices.push_back(i); 1350 } 1351 } 1352 } 1353 1354 void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices( 1355 LinearizedRegion *Region, MachineInstr &PHI, 1356 SmallVector<unsigned, 2> &PHINonRegionIndices) { 1357 unsigned NumInputs = getPHINumInputs(PHI); 1358 for (unsigned i = 0; i < NumInputs; ++i) { 1359 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1360 if (!Region->contains(Pred)) { 1361 PHINonRegionIndices.push_back(i); 1362 } 1363 } 1364 } 1365 1366 void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest( 1367 unsigned LDestReg, MachineInstr &PHI, 1368 SmallVector<unsigned, 2> *RegionIndices) { 1369 if (RegionIndices) { 1370 for (auto i : *RegionIndices) { 1371 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); 1372 } 1373 } else { 1374 unsigned NumInputs = getPHINumInputs(PHI); 1375 for (unsigned i = 0; i < NumInputs; ++i) { 1376 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); 1377 } 1378 } 1379 } 1380 1381 unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo( 1382 MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) { 1383 unsigned DestReg = getPHIDestReg(PHI); 1384 unsigned LinearizeDestReg = 1385 MRI->createVirtualRegister(MRI->getRegClass(DestReg)); 1386 PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); 1387 storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); 1388 return LinearizeDestReg; 1389 } 1390 1391 void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { 1392 // We need to create a new chain for the killed phi, but there is no 1393 // need to do the renaming outside or inside the block. 1394 SmallPtrSet<MachineInstr *, 2> PHIs; 1395 for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), 1396 E = MBB->instr_end(); 1397 I != E; ++I) { 1398 MachineInstr &Instr = *I; 1399 if (Instr.isPHI()) { 1400 unsigned PHIDestReg = getPHIDestReg(Instr); 1401 DEBUG(dbgs() << "Extractking killed phi:\n"); 1402 DEBUG(Instr.dump()); 1403 PHIs.insert(&Instr); 1404 PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); 1405 storePHILinearizationInfoDest(PHIDestReg, Instr); 1406 } 1407 } 1408 1409 for (auto PI : PHIs) { 1410 PI->eraseFromParent(); 1411 } 1412 } 1413 1414 static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices, 1415 unsigned Index) { 1416 for (auto i : PHIRegionIndices) { 1417 if (i == Index) 1418 return true; 1419 } 1420 return false; 1421 } 1422 1423 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, 1424 SmallVector<unsigned, 2> &PHIIndices, 1425 unsigned *ReplaceReg) { 1426 return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg); 1427 } 1428 1429 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, 1430 unsigned CombinedSourceReg, 1431 MachineBasicBlock *SourceMBB, 1432 SmallVector<unsigned, 2> &PHIIndices, 1433 unsigned *ReplaceReg) { 1434 DEBUG(dbgs() << "Shrink PHI: "); 1435 DEBUG(PHI.dump()); 1436 DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI) 1437 << "<def> = PHI("); 1438 1439 bool Replaced = false; 1440 unsigned NumInputs = getPHINumInputs(PHI); 1441 int SingleExternalEntryIndex = -1; 1442 for (unsigned i = 0; i < NumInputs; ++i) { 1443 if (!isPHIRegionIndex(PHIIndices, i)) { 1444 if (SingleExternalEntryIndex == -1) { 1445 // Single entry 1446 SingleExternalEntryIndex = i; 1447 } else { 1448 // Multiple entries 1449 SingleExternalEntryIndex = -2; 1450 } 1451 } 1452 } 1453 1454 if (SingleExternalEntryIndex > -1) { 1455 *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex); 1456 // We should not rewrite the code, we should only pick up the single value 1457 // that represents the shrunk PHI. 1458 Replaced = true; 1459 } else { 1460 MachineBasicBlock *MBB = PHI.getParent(); 1461 MachineInstrBuilder MIB = 1462 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1463 getPHIDestReg(PHI)); 1464 if (SourceMBB) { 1465 MIB.addReg(CombinedSourceReg); 1466 MIB.addMBB(SourceMBB); 1467 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1468 << SourceMBB->getNumber()); 1469 } 1470 1471 for (unsigned i = 0; i < NumInputs; ++i) { 1472 if (isPHIRegionIndex(PHIIndices, i)) { 1473 continue; 1474 } 1475 unsigned SourceReg = getPHISourceReg(PHI, i); 1476 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1477 MIB.addReg(SourceReg); 1478 MIB.addMBB(SourcePred); 1479 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1480 << SourcePred->getNumber()); 1481 } 1482 DEBUG(dbgs() << ")\n"); 1483 } 1484 PHI.eraseFromParent(); 1485 return Replaced; 1486 } 1487 1488 void AMDGPUMachineCFGStructurizer::replacePHI( 1489 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge, 1490 SmallVector<unsigned, 2> &PHIRegionIndices) { 1491 DEBUG(dbgs() << "Replace PHI: "); 1492 DEBUG(PHI.dump()); 1493 DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) 1494 << "<def> = PHI("); 1495 1496 bool HasExternalEdge = false; 1497 unsigned NumInputs = getPHINumInputs(PHI); 1498 for (unsigned i = 0; i < NumInputs; ++i) { 1499 if (!isPHIRegionIndex(PHIRegionIndices, i)) { 1500 HasExternalEdge = true; 1501 } 1502 } 1503 1504 if (HasExternalEdge) { 1505 MachineBasicBlock *MBB = PHI.getParent(); 1506 MachineInstrBuilder MIB = 1507 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1508 getPHIDestReg(PHI)); 1509 MIB.addReg(CombinedSourceReg); 1510 MIB.addMBB(LastMerge); 1511 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1512 << LastMerge->getNumber()); 1513 for (unsigned i = 0; i < NumInputs; ++i) { 1514 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1515 continue; 1516 } 1517 unsigned SourceReg = getPHISourceReg(PHI, i); 1518 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1519 MIB.addReg(SourceReg); 1520 MIB.addMBB(SourcePred); 1521 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1522 << SourcePred->getNumber()); 1523 } 1524 DEBUG(dbgs() << ")\n"); 1525 } else { 1526 replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); 1527 } 1528 PHI.eraseFromParent(); 1529 } 1530 1531 void AMDGPUMachineCFGStructurizer::replaceEntryPHI( 1532 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, 1533 SmallVector<unsigned, 2> &PHIRegionIndices) { 1534 1535 DEBUG(dbgs() << "Replace entry PHI: "); 1536 DEBUG(PHI.dump()); 1537 DEBUG(dbgs() << " with "); 1538 1539 unsigned NumInputs = getPHINumInputs(PHI); 1540 unsigned NumNonRegionInputs = NumInputs; 1541 for (unsigned i = 0; i < NumInputs; ++i) { 1542 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1543 NumNonRegionInputs--; 1544 } 1545 } 1546 1547 if (NumNonRegionInputs == 0) { 1548 auto DestReg = getPHIDestReg(PHI); 1549 replaceRegisterWith(DestReg, CombinedSourceReg); 1550 DEBUG(dbgs() << " register " << PrintReg(CombinedSourceReg, TRI) << "\n"); 1551 PHI.eraseFromParent(); 1552 } else { 1553 DEBUG(dbgs() << PrintReg(getPHIDestReg(PHI), TRI) << "<def> = PHI("); 1554 MachineBasicBlock *MBB = PHI.getParent(); 1555 MachineInstrBuilder MIB = 1556 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1557 getPHIDestReg(PHI)); 1558 MIB.addReg(CombinedSourceReg); 1559 MIB.addMBB(IfMBB); 1560 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1561 << IfMBB->getNumber()); 1562 unsigned NumInputs = getPHINumInputs(PHI); 1563 for (unsigned i = 0; i < NumInputs; ++i) { 1564 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1565 continue; 1566 } 1567 unsigned SourceReg = getPHISourceReg(PHI, i); 1568 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1569 MIB.addReg(SourceReg); 1570 MIB.addMBB(SourcePred); 1571 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1572 << SourcePred->getNumber()); 1573 } 1574 DEBUG(dbgs() << ")\n"); 1575 PHI.eraseFromParent(); 1576 } 1577 } 1578 1579 void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs( 1580 MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices, 1581 unsigned CombinedSourceReg, LinearizedRegion *LRegion) { 1582 bool WasLiveOut = false; 1583 for (auto PII : PHIRegionIndices) { 1584 unsigned Reg = getPHISourceReg(PHI, PII); 1585 if (LRegion->isLiveOut(Reg)) { 1586 bool IsDead = true; 1587 1588 // Check if register is live out of the basic block 1589 MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent(); 1590 for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) { 1591 if ((*UI).getParent()->getParent() != DefMBB) { 1592 IsDead = false; 1593 } 1594 } 1595 1596 DEBUG(dbgs() << "Register " << PrintReg(Reg, TRI) << " is " 1597 << (IsDead ? "dead" : "alive") << " after PHI replace\n"); 1598 if (IsDead) { 1599 LRegion->removeLiveOut(Reg); 1600 } 1601 WasLiveOut = true; 1602 } 1603 } 1604 1605 if (WasLiveOut) 1606 LRegion->addLiveOut(CombinedSourceReg); 1607 } 1608 1609 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region, 1610 MachineBasicBlock *LastMerge, 1611 MachineInstr &PHI, 1612 LinearizedRegion *LRegion) { 1613 SmallVector<unsigned, 2> PHIRegionIndices; 1614 getPHIRegionIndices(Region, PHI, PHIRegionIndices); 1615 unsigned LinearizedSourceReg = 1616 storePHILinearizationInfo(PHI, &PHIRegionIndices); 1617 1618 replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices); 1619 replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion); 1620 } 1621 1622 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region, 1623 MachineBasicBlock *IfMBB, 1624 MachineInstr &PHI) { 1625 SmallVector<unsigned, 2> PHINonRegionIndices; 1626 getPHINonRegionIndices(Region, PHI, PHINonRegionIndices); 1627 unsigned LinearizedSourceReg = 1628 storePHILinearizationInfo(PHI, &PHINonRegionIndices); 1629 replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices); 1630 } 1631 1632 static void collectPHIs(MachineBasicBlock *MBB, 1633 SmallVector<MachineInstr *, 2> &PHIs) { 1634 for (auto &BBI : *MBB) { 1635 if (BBI.isPHI()) { 1636 PHIs.push_back(&BBI); 1637 } 1638 } 1639 } 1640 1641 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region, 1642 MachineBasicBlock *LastMerge, 1643 LinearizedRegion *LRegion) { 1644 SmallVector<MachineInstr *, 2> PHIs; 1645 auto Exit = Region->getSucc(); 1646 if (Exit == nullptr) 1647 return; 1648 1649 collectPHIs(Exit, PHIs); 1650 1651 for (auto PHII : PHIs) { 1652 rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion); 1653 } 1654 } 1655 1656 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region, 1657 MachineBasicBlock *IfMBB) { 1658 SmallVector<MachineInstr *, 2> PHIs; 1659 auto Entry = Region->getEntry(); 1660 1661 collectPHIs(Entry, PHIs); 1662 1663 for (auto PHII : PHIs) { 1664 rewriteRegionEntryPHI(Region, IfMBB, *PHII); 1665 } 1666 } 1667 1668 void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, 1669 MachineBasicBlock *Dest, 1670 const DebugLoc &DL) { 1671 DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber() 1672 << " -> " << Dest->getNumber() << "\n"); 1673 MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator(); 1674 bool HasTerminator = Terminator != MBB->instr_end(); 1675 if (HasTerminator) { 1676 TII->ReplaceTailWithBranchTo(Terminator, Dest); 1677 } 1678 if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) { 1679 TII->insertUnconditionalBranch(*MBB, Dest, DL); 1680 } 1681 } 1682 1683 static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) { 1684 MachineBasicBlock *result = nullptr; 1685 for (auto &MFI : MF) { 1686 if (MFI.succ_size() == 0) { 1687 if (result == nullptr) { 1688 result = &MFI; 1689 } else { 1690 return nullptr; 1691 } 1692 } 1693 } 1694 1695 return result; 1696 } 1697 1698 static bool hasOneExitNode(MachineFunction &MF) { 1699 return getSingleExitNode(MF) != nullptr; 1700 } 1701 1702 MachineBasicBlock * 1703 AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) { 1704 auto Exit = Region->getSucc(); 1705 1706 // If the exit is the end of the function, we just use the existing 1707 MachineFunction *MF = Region->getEntry()->getParent(); 1708 if (Exit == nullptr && hasOneExitNode(*MF)) { 1709 return &(*(--(Region->getEntry()->getParent()->end()))); 1710 } 1711 1712 MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock(); 1713 if (Exit == nullptr) { 1714 MachineFunction::iterator ExitIter = MF->end(); 1715 MF->insert(ExitIter, LastMerge); 1716 } else { 1717 MachineFunction::iterator ExitIter = Exit->getIterator(); 1718 MF->insert(ExitIter, LastMerge); 1719 LastMerge->addSuccessor(Exit); 1720 insertUnconditionalBranch(LastMerge, Exit); 1721 DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n"); 1722 } 1723 return LastMerge; 1724 } 1725 1726 void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, 1727 MachineBasicBlock *CodeBB, 1728 MachineBasicBlock *MergeBB, 1729 unsigned DestRegister, 1730 unsigned IfSourceRegister, 1731 unsigned CodeSourceRegister, 1732 bool IsUndefIfSource) { 1733 // If this is the function exit block, we don't need a phi. 1734 if (MergeBB->succ_begin() == MergeBB->succ_end()) { 1735 return; 1736 } 1737 DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber() 1738 << "): " << PrintReg(DestRegister, TRI) << "<def> = PHI(" 1739 << PrintReg(IfSourceRegister, TRI) << ", BB#" 1740 << IfBB->getNumber() << PrintReg(CodeSourceRegister, TRI) 1741 << ", BB#" << CodeBB->getNumber() << ")\n"); 1742 const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin()); 1743 MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, 1744 TII->get(TargetOpcode::PHI), DestRegister); 1745 if (IsUndefIfSource && false) { 1746 MIB.addReg(IfSourceRegister, RegState::Undef); 1747 } else { 1748 MIB.addReg(IfSourceRegister); 1749 } 1750 MIB.addMBB(IfBB); 1751 MIB.addReg(CodeSourceRegister); 1752 MIB.addMBB(CodeBB); 1753 } 1754 1755 static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) { 1756 for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), 1757 E = MBB->succ_end(); 1758 PI != E; ++PI) { 1759 if ((*PI) != MBB) { 1760 (MBB)->removeSuccessor(*PI); 1761 } 1762 } 1763 } 1764 1765 static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, 1766 MachineBasicBlock *EndMBB) { 1767 1768 // We have to check against the StartMBB successor becasuse a 1769 // structurized region with a loop will have the entry block split, 1770 // and the backedge will go to the entry successor. 1771 DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs; 1772 unsigned SuccSize = StartMBB->succ_size(); 1773 if (SuccSize > 0) { 1774 MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); 1775 for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), 1776 E = EndMBB->succ_end(); 1777 PI != E; ++PI) { 1778 // Either we have a back-edge to the entry block, or a back-edge to the 1779 // successor of the entry block since the block may be split. 1780 if ((*PI) != StartMBB && 1781 !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { 1782 Succs.insert( 1783 std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI)); 1784 } 1785 } 1786 } 1787 1788 for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), 1789 E = StartMBB->pred_end(); 1790 PI != E; ++PI) { 1791 if ((*PI) != EndMBB) { 1792 Succs.insert( 1793 std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB)); 1794 } 1795 } 1796 1797 for (auto SI : Succs) { 1798 std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI; 1799 DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#" 1800 << Edge.second->getNumber() << "\n"); 1801 Edge.first->removeSuccessor(Edge.second); 1802 } 1803 } 1804 1805 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( 1806 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart, 1807 MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg, 1808 bool InheritPreds) { 1809 MachineFunction *MF = MergeBB->getParent(); 1810 MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); 1811 1812 if (InheritPreds) { 1813 for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), 1814 E = CodeBBStart->pred_end(); 1815 PI != E; ++PI) { 1816 if ((*PI) != CodeBBEnd) { 1817 MachineBasicBlock *Pred = (*PI); 1818 Pred->addSuccessor(IfBB); 1819 } 1820 } 1821 } 1822 1823 removeExternalCFGEdges(CodeBBStart, CodeBBEnd); 1824 1825 auto CodeBBStartI = CodeBBStart->getIterator(); 1826 auto CodeBBEndI = CodeBBEnd->getIterator(); 1827 auto MergeIter = MergeBB->getIterator(); 1828 MF->insert(MergeIter, IfBB); 1829 MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI); 1830 IfBB->addSuccessor(MergeBB); 1831 IfBB->addSuccessor(CodeBBStart); 1832 1833 DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); 1834 // Ensure that the MergeBB is a successor of the CodeEndBB. 1835 if (!CodeBBEnd->isSuccessor(MergeBB)) 1836 CodeBBEnd->addSuccessor(MergeBB); 1837 1838 DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#" 1839 << CodeBBEnd->getNumber() << "\n"); 1840 1841 // If we have a single predecessor we can find a reasonable debug location 1842 MachineBasicBlock *SinglePred = 1843 CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr; 1844 const DebugLoc &DL = SinglePred 1845 ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator()) 1846 : DebugLoc(); 1847 1848 unsigned Reg = 1849 TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg, 1850 SelectBB->getNumber() /* CodeBBStart->getNumber() */); 1851 if (&(*(IfBB->getParent()->begin())) == IfBB) { 1852 TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg, 1853 CodeBBStart->getNumber()); 1854 } 1855 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); 1856 ArrayRef<MachineOperand> Cond(RegOp); 1857 TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL); 1858 1859 return IfBB; 1860 } 1861 1862 void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled( 1863 SmallVector<MachineOperand, 1> Cond) { 1864 if (Cond.size() != 1) 1865 return; 1866 if (!Cond[0].isReg()) 1867 return; 1868 1869 unsigned CondReg = Cond[0].getReg(); 1870 for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { 1871 (*UI).setIsKill(false); 1872 } 1873 } 1874 1875 void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, 1876 MachineBasicBlock *MergeBB, 1877 unsigned BBSelectReg) { 1878 MachineBasicBlock *TrueBB = nullptr; 1879 MachineBasicBlock *FalseBB = nullptr; 1880 SmallVector<MachineOperand, 1> Cond; 1881 MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB]; 1882 TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond); 1883 1884 const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator()); 1885 1886 if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) { 1887 // This is an exit block, hence no successors. We will assign the 1888 // bb select register to the entry block. 1889 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1890 BBSelectReg, 1891 CodeBB->getParent()->begin()->getNumber()); 1892 insertUnconditionalBranch(CodeBB, MergeBB, DL); 1893 return; 1894 } 1895 1896 if (FalseBB == nullptr && TrueBB == nullptr) { 1897 TrueBB = FallthroughBB; 1898 } else if (TrueBB != nullptr) { 1899 FalseBB = 1900 (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB; 1901 } 1902 1903 if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) { 1904 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1905 BBSelectReg, TrueBB->getNumber()); 1906 } else { 1907 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); 1908 unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); 1909 unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); 1910 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1911 TrueBBReg, TrueBB->getNumber()); 1912 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1913 FalseBBReg, FalseBB->getNumber()); 1914 ensureCondIsNotKilled(Cond); 1915 TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL, 1916 BBSelectReg, Cond, TrueBBReg, FalseBBReg); 1917 } 1918 1919 insertUnconditionalBranch(CodeBB, MergeBB, DL); 1920 } 1921 1922 MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) { 1923 if (MRI->def_begin(Reg) == MRI->def_end()) { 1924 DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1925 << " has NO defs\n"); 1926 } else if (!MRI->hasOneDef(Reg)) { 1927 DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1928 << " has multiple defs\n"); 1929 DEBUG(dbgs() << "DEFS BEGIN:\n"); 1930 for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { 1931 DEBUG(DI->getParent()->dump()); 1932 } 1933 DEBUG(dbgs() << "DEFS END\n"); 1934 } 1935 1936 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 1937 return (*(MRI->def_begin(Reg))).getParent(); 1938 } 1939 1940 void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, 1941 MachineBasicBlock *CodeBB, 1942 MachineBasicBlock *MergeBB, 1943 LinearizedRegion *InnerRegion, 1944 unsigned DestReg, 1945 unsigned SourceReg) { 1946 // In this function we know we are part of a chain already, so we need 1947 // to add the registers to the existing chain, and rename the register 1948 // inside the region. 1949 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); 1950 MachineInstr *DefInstr = getDefInstr(SourceReg); 1951 if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) { 1952 // Handle the case where the def is a PHI-def inside a basic 1953 // block, then we only need to do renaming. Special care needs to 1954 // be taken if the PHI-def is part of an existing chain, or if a 1955 // new one needs to be created. 1956 InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI); 1957 1958 // We collect all PHI Information, and if we are at the region entry, 1959 // all PHIs will be removed, and then re-introduced if needed. 1960 storePHILinearizationInfoDest(DestReg, *DefInstr); 1961 // We have picked up all the information we need now and can remove 1962 // the PHI 1963 PHIInfo.removeSource(DestReg, SourceReg, CodeBB); 1964 DefInstr->eraseFromParent(); 1965 } else { 1966 // If this is not a phi-def, or it is a phi-def but from a linearized region 1967 if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) { 1968 // If this is a single BB and the definition is in this block we 1969 // need to replace any uses outside the region. 1970 InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); 1971 } 1972 const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); 1973 unsigned NextDestReg = MRI->createVirtualRegister(RegClass); 1974 bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; 1975 DEBUG(dbgs() << "Insert Chained PHI\n"); 1976 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, 1977 SourceReg, IsLastDef); 1978 1979 PHIInfo.removeSource(DestReg, SourceReg, CodeBB); 1980 if (IsLastDef) { 1981 const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator()); 1982 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL, 1983 NextDestReg, 0); 1984 PHIInfo.deleteDef(DestReg); 1985 } else { 1986 PHIInfo.replaceDef(DestReg, NextDestReg); 1987 } 1988 } 1989 } 1990 1991 bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB, 1992 LinearizedRegion *InnerRegion, 1993 unsigned Register) { 1994 return getDefInstr(Register)->getParent() == MBB || 1995 InnerRegion->contains(getDefInstr(Register)->getParent()); 1996 } 1997 1998 void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, 1999 MachineBasicBlock *CodeBB, 2000 MachineBasicBlock *MergeBB, 2001 LinearizedRegion *InnerRegion, 2002 LinearizedRegion *LRegion) { 2003 DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts(); 2004 SmallVector<unsigned, 4> OldLiveOuts; 2005 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); 2006 for (auto OLI : *LiveOuts) { 2007 OldLiveOuts.push_back(OLI); 2008 } 2009 2010 for (auto LI : OldLiveOuts) { 2011 DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI)); 2012 if (!containsDef(CodeBB, InnerRegion, LI) || 2013 (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) { 2014 // If the register simly lives through the CodeBB, we don't have 2015 // to rewrite anything since the register is not defined in this 2016 // part of the code. 2017 DEBUG(dbgs() << "- through"); 2018 continue; 2019 } 2020 DEBUG(dbgs() << "\n"); 2021 unsigned Reg = LI; 2022 if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) { 2023 // If the register is live out, we do want to create a phi, 2024 // unless it is from the Exit block, becasuse in that case there 2025 // is already a PHI, and no need to create a new one. 2026 2027 // If the register is just a live out def and not part of a phi 2028 // chain, we need to create a PHI node to handle the if region, 2029 // and replace all uses outside of the region with the new dest 2030 // register, unless it is the outgoing BB select register. We have 2031 // already creaed phi nodes for these. 2032 const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); 2033 unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); 2034 unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); 2035 // Create initializer, this value is never used, but is needed 2036 // to satisfy SSA. 2037 DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n"); 2038 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), 2039 IfSourceReg, 0); 2040 2041 InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); 2042 DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); 2043 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, 2044 IfSourceReg, Reg, true); 2045 } 2046 } 2047 2048 // Handle the chained definitions in PHIInfo, checking if this basic block 2049 // is a source block for a definition. 2050 SmallVector<unsigned, 4> Sources; 2051 if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) { 2052 DEBUG(dbgs() << "Inserting PHI Live Out from BB#" << CodeBB->getNumber() 2053 << "\n"); 2054 for (auto SI : Sources) { 2055 unsigned DestReg; 2056 PHIInfo.findDest(SI, CodeBB, DestReg); 2057 insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI); 2058 } 2059 DEBUG(dbgs() << "Insertion done.\n"); 2060 } 2061 2062 DEBUG(PHIInfo.dump(MRI)); 2063 } 2064 2065 void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) { 2066 DEBUG(dbgs() << "Before PHI Prune\n"); 2067 DEBUG(PHIInfo.dump(MRI)); 2068 SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4> 2069 ElimiatedSources; 2070 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2071 ++DRI) { 2072 2073 unsigned DestReg = *DRI; 2074 auto SE = PHIInfo.sources_end(DestReg); 2075 2076 bool MBBContainsPHISource = false; 2077 // Check if there is a PHI source in this MBB 2078 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2079 unsigned SourceReg = (*SRI).first; 2080 MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); 2081 if (Def->getParent()->getParent() == MBB) { 2082 MBBContainsPHISource = true; 2083 } 2084 } 2085 2086 // If so, all other sources are useless since we know this block 2087 // is always executed when the region is executed. 2088 if (MBBContainsPHISource) { 2089 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2090 PHILinearize::PHISourceT Source = *SRI; 2091 unsigned SourceReg = Source.first; 2092 MachineBasicBlock *SourceMBB = Source.second; 2093 MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); 2094 if (Def->getParent()->getParent() != MBB) { 2095 ElimiatedSources.push_back( 2096 std::make_tuple(DestReg, SourceReg, SourceMBB)); 2097 } 2098 } 2099 } 2100 } 2101 2102 // Remove the PHI sources that are in the given MBB 2103 for (auto &SourceInfo : ElimiatedSources) { 2104 PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo), 2105 std::get<2>(SourceInfo)); 2106 } 2107 DEBUG(dbgs() << "After PHI Prune\n"); 2108 DEBUG(PHIInfo.dump(MRI)); 2109 } 2110 2111 void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion, 2112 unsigned DestReg) { 2113 MachineBasicBlock *Entry = CurrentRegion->getEntry(); 2114 MachineBasicBlock *Exit = CurrentRegion->getExit(); 2115 2116 DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() 2117 << " Pred: " << (*(Entry->pred_begin()))->getNumber() << "\n"); 2118 2119 int NumSources = 0; 2120 auto SE = PHIInfo.sources_end(DestReg); 2121 2122 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2123 NumSources++; 2124 } 2125 2126 if (NumSources == 1) { 2127 auto SRI = PHIInfo.sources_begin(DestReg); 2128 unsigned SourceReg = (*SRI).first; 2129 replaceRegisterWith(DestReg, SourceReg); 2130 } else { 2131 const DebugLoc &DL = Entry->findDebugLoc(Entry->begin()); 2132 MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, 2133 TII->get(TargetOpcode::PHI), DestReg); 2134 DEBUG(dbgs() << "Entry PHI " << PrintReg(DestReg, TRI) << "<def> = PHI("); 2135 2136 unsigned CurrentBackedgeReg = 0; 2137 2138 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2139 unsigned SourceReg = (*SRI).first; 2140 2141 if (CurrentRegion->contains((*SRI).second)) { 2142 if (CurrentBackedgeReg == 0) { 2143 CurrentBackedgeReg = SourceReg; 2144 } else { 2145 MachineInstr *PHIDefInstr = getDefInstr(SourceReg); 2146 MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent(); 2147 const TargetRegisterClass *RegClass = 2148 MRI->getRegClass(CurrentBackedgeReg); 2149 unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass); 2150 MachineInstrBuilder BackedgePHI = 2151 BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL, 2152 TII->get(TargetOpcode::PHI), NewBackedgeReg); 2153 BackedgePHI.addReg(CurrentBackedgeReg); 2154 BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0)); 2155 BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1)); 2156 BackedgePHI.addMBB((*SRI).second); 2157 CurrentBackedgeReg = NewBackedgeReg; 2158 DEBUG(dbgs() << "Inserting backedge PHI: " 2159 << PrintReg(NewBackedgeReg, TRI) << "<def> = PHI(" 2160 << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" 2161 << getPHIPred(*PHIDefInstr, 0)->getNumber() << ", " 2162 << PrintReg(getPHISourceReg(*PHIDefInstr, 1), TRI) 2163 << ", BB#" << (*SRI).second->getNumber()); 2164 } 2165 } else { 2166 MIB.addReg(SourceReg); 2167 MIB.addMBB((*SRI).second); 2168 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 2169 << (*SRI).second->getNumber() << ", "); 2170 } 2171 } 2172 2173 // Add the final backedge register source to the entry phi 2174 if (CurrentBackedgeReg != 0) { 2175 MIB.addReg(CurrentBackedgeReg); 2176 MIB.addMBB(Exit); 2177 DEBUG(dbgs() << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" 2178 << Exit->getNumber() << ")\n"); 2179 } else { 2180 DEBUG(dbgs() << ")\n"); 2181 } 2182 } 2183 } 2184 2185 void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { 2186 DEBUG(PHIInfo.dump(MRI)); 2187 2188 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2189 ++DRI) { 2190 2191 unsigned DestReg = *DRI; 2192 createEntryPHI(CurrentRegion, DestReg); 2193 } 2194 PHIInfo.clear(); 2195 } 2196 2197 void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, 2198 unsigned NewRegister) { 2199 assert(Register != NewRegister && "Cannot replace a reg with itself"); 2200 2201 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 2202 E = MRI->reg_end(); 2203 I != E;) { 2204 MachineOperand &O = *I; 2205 ++I; 2206 if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { 2207 DEBUG(dbgs() << "Trying to substitute physical register: " 2208 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 2209 << "\n"); 2210 llvm_unreachable("Cannot substitute physical registers"); 2211 // We don't handle physical registers, but if we need to 2212 // in the future This is how we do it: 2213 // O.substPhysReg(NewRegister, *TRI); 2214 } else { 2215 DEBUG(dbgs() << "Replacing register: " 2216 << PrintReg(Register, MRI->getTargetRegisterInfo()) 2217 << " with " 2218 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 2219 << "\n"); 2220 O.setReg(NewRegister); 2221 } 2222 } 2223 PHIInfo.deleteDef(Register); 2224 2225 getRegionMRT()->replaceLiveOutReg(Register, NewRegister); 2226 2227 DEBUG(PHIInfo.dump(MRI)); 2228 } 2229 2230 void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) { 2231 DEBUG(dbgs() << "Resolve PHI Infos\n"); 2232 DEBUG(PHIInfo.dump(MRI)); 2233 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2234 ++DRI) { 2235 unsigned DestReg = *DRI; 2236 DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n"); 2237 auto SRI = PHIInfo.sources_begin(DestReg); 2238 unsigned SourceReg = (*SRI).first; 2239 DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) 2240 << " SourceReg: " << PrintReg(SourceReg, TRI) << "\n"); 2241 2242 assert(PHIInfo.sources_end(DestReg) == ++SRI && 2243 "More than one phi source in entry node"); 2244 replaceRegisterWith(DestReg, SourceReg); 2245 } 2246 } 2247 2248 static bool isFunctionEntryBlock(MachineBasicBlock *MBB) { 2249 return ((&(*(MBB->getParent()->begin()))) == MBB); 2250 } 2251 2252 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( 2253 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB, 2254 LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn, 2255 unsigned BBSelectRegOut) { 2256 if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) { 2257 // Handle non-loop function entry block. 2258 // We need to allow loops to the entry block and then 2259 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); 2260 resolvePHIInfos(CodeBB); 2261 removeExternalCFGSuccessors(CodeBB); 2262 CodeBB->addSuccessor(MergeBB); 2263 CurrentRegion->addMBB(CodeBB); 2264 return nullptr; 2265 } 2266 if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) { 2267 // Handle non-loop region entry block. 2268 MachineFunction *MF = MergeBB->getParent(); 2269 auto MergeIter = MergeBB->getIterator(); 2270 auto CodeBBStartIter = CodeBB->getIterator(); 2271 auto CodeBBEndIter = ++(CodeBB->getIterator()); 2272 if (CodeBBEndIter != MergeIter) { 2273 MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter); 2274 } 2275 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); 2276 prunePHIInfo(CodeBB); 2277 createEntryPHIs(CurrentRegion); 2278 removeExternalCFGSuccessors(CodeBB); 2279 CodeBB->addSuccessor(MergeBB); 2280 CurrentRegion->addMBB(CodeBB); 2281 return nullptr; 2282 } else { 2283 // Handle internal block. 2284 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); 2285 unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); 2286 rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); 2287 bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; 2288 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, 2289 BBSelectRegIn, IsRegionEntryBB); 2290 CurrentRegion->addMBB(IfBB); 2291 // If this is the entry block we need to make the If block the new 2292 // linearized region entry. 2293 if (IsRegionEntryBB) { 2294 CurrentRegion->setEntry(IfBB); 2295 2296 if (CurrentRegion->getHasLoop()) { 2297 MachineBasicBlock *RegionExit = CurrentRegion->getExit(); 2298 MachineBasicBlock *ETrueBB = nullptr; 2299 MachineBasicBlock *EFalseBB = nullptr; 2300 SmallVector<MachineOperand, 1> ECond; 2301 2302 const DebugLoc &DL = DebugLoc(); 2303 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); 2304 TII->removeBranch(*RegionExit); 2305 2306 // We need to create a backedge if there is a loop 2307 unsigned Reg = TII->insertNE( 2308 RegionExit, RegionExit->instr_end(), DL, 2309 CurrentRegion->getRegionMRT()->getInnerOutputRegister(), 2310 CurrentRegion->getRegionMRT()->getEntry()->getNumber()); 2311 MachineOperand RegOp = 2312 MachineOperand::CreateReg(Reg, false, false, true); 2313 ArrayRef<MachineOperand> Cond(RegOp); 2314 DEBUG(dbgs() << "RegionExitReg: "); 2315 DEBUG(Cond[0].print(dbgs(), TRI)); 2316 DEBUG(dbgs() << "\n"); 2317 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, 2318 Cond, DebugLoc()); 2319 RegionExit->addSuccessor(CurrentRegion->getEntry()); 2320 } 2321 } 2322 CurrentRegion->addMBB(CodeBB); 2323 LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); 2324 2325 InnerRegion.setParent(CurrentRegion); 2326 DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); 2327 insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, 2328 CodeBBSelectReg); 2329 InnerRegion.addMBB(MergeBB); 2330 2331 DEBUG(InnerRegion.print(dbgs(), TRI)); 2332 rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); 2333 extractKilledPHIs(CodeBB); 2334 if (IsRegionEntryBB) { 2335 createEntryPHIs(CurrentRegion); 2336 } 2337 return IfBB; 2338 } 2339 } 2340 2341 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( 2342 MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion, 2343 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, 2344 unsigned BBSelectRegIn, unsigned BBSelectRegOut) { 2345 unsigned CodeBBSelectReg = 2346 InnerRegion->getRegionMRT()->getInnerOutputRegister(); 2347 MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry(); 2348 MachineBasicBlock *CodeExitBB = InnerRegion->getExit(); 2349 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB, 2350 SelectBB, BBSelectRegIn, true); 2351 CurrentRegion->addMBB(IfBB); 2352 bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry(); 2353 if (isEntry) { 2354 2355 if (CurrentRegion->getHasLoop()) { 2356 MachineBasicBlock *RegionExit = CurrentRegion->getExit(); 2357 MachineBasicBlock *ETrueBB = nullptr; 2358 MachineBasicBlock *EFalseBB = nullptr; 2359 SmallVector<MachineOperand, 1> ECond; 2360 2361 const DebugLoc &DL = DebugLoc(); 2362 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); 2363 TII->removeBranch(*RegionExit); 2364 2365 // We need to create a backedge if there is a loop 2366 unsigned Reg = 2367 TII->insertNE(RegionExit, RegionExit->instr_end(), DL, 2368 CurrentRegion->getRegionMRT()->getInnerOutputRegister(), 2369 CurrentRegion->getRegionMRT()->getEntry()->getNumber()); 2370 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); 2371 ArrayRef<MachineOperand> Cond(RegOp); 2372 DEBUG(dbgs() << "RegionExitReg: "); 2373 DEBUG(Cond[0].print(dbgs(), TRI)); 2374 DEBUG(dbgs() << "\n"); 2375 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, 2376 Cond, DebugLoc()); 2377 RegionExit->addSuccessor(IfBB); 2378 } 2379 } 2380 CurrentRegion->addMBBs(InnerRegion); 2381 DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); 2382 insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, 2383 CodeBBSelectReg); 2384 2385 rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion, 2386 CurrentRegion); 2387 2388 rewriteRegionEntryPHIs(InnerRegion, IfBB); 2389 2390 if (isEntry) { 2391 CurrentRegion->setEntry(IfBB); 2392 } 2393 2394 if (isEntry) { 2395 createEntryPHIs(CurrentRegion); 2396 } 2397 2398 return IfBB; 2399 } 2400 2401 void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, 2402 MachineBasicBlock *Entry, 2403 MachineBasicBlock *EntrySucc, 2404 LinearizedRegion *LRegion) { 2405 SmallVector<unsigned, 2> PHIRegionIndices; 2406 getPHIRegionIndices(LRegion, PHI, PHIRegionIndices); 2407 2408 assert(PHIRegionIndices.size() == 1); 2409 2410 unsigned RegionIndex = PHIRegionIndices[0]; 2411 unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex); 2412 MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex); 2413 unsigned PHIDest = getPHIDestReg(PHI); 2414 unsigned PHISource = PHIDest; 2415 unsigned ReplaceReg; 2416 2417 if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) { 2418 PHISource = ReplaceReg; 2419 } 2420 2421 const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); 2422 unsigned NewDestReg = MRI->createVirtualRegister(RegClass); 2423 LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); 2424 MachineInstrBuilder MIB = 2425 BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), 2426 TII->get(TargetOpcode::PHI), NewDestReg); 2427 DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI) 2428 << "<def> = PHI("); 2429 MIB.addReg(PHISource); 2430 MIB.addMBB(Entry); 2431 DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber()); 2432 MIB.addReg(RegionSourceReg); 2433 MIB.addMBB(RegionSourceMBB); 2434 DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << ", BB#" 2435 << RegionSourceMBB->getNumber() << ")\n"); 2436 } 2437 2438 void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, 2439 MachineBasicBlock *EntrySucc, 2440 LinearizedRegion *LRegion) { 2441 SmallVector<MachineInstr *, 2> PHIs; 2442 collectPHIs(Entry, PHIs); 2443 2444 for (auto PHII : PHIs) { 2445 splitLoopPHI(*PHII, Entry, EntrySucc, LRegion); 2446 } 2447 } 2448 2449 // Split the exit block so that we can insert a end control flow 2450 MachineBasicBlock * 2451 AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) { 2452 auto MRTRegion = LRegion->getRegionMRT(); 2453 auto Exit = LRegion->getExit(); 2454 auto MF = Exit->getParent(); 2455 auto Succ = MRTRegion->getSucc(); 2456 2457 auto NewExit = MF->CreateMachineBasicBlock(); 2458 auto AfterExitIter = Exit->getIterator(); 2459 AfterExitIter++; 2460 MF->insert(AfterExitIter, NewExit); 2461 Exit->removeSuccessor(Succ); 2462 Exit->addSuccessor(NewExit); 2463 NewExit->addSuccessor(Succ); 2464 insertUnconditionalBranch(NewExit, Succ); 2465 LRegion->addMBB(NewExit); 2466 LRegion->setExit(NewExit); 2467 2468 DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n"); 2469 2470 // Replace any PHI Predecessors in the successor with NewExit 2471 for (auto &II : *Succ) { 2472 MachineInstr &Instr = II; 2473 2474 // If we are past the PHI instructions we are done 2475 if (!Instr.isPHI()) 2476 break; 2477 2478 int numPreds = getPHINumInputs(Instr); 2479 for (int i = 0; i < numPreds; ++i) { 2480 auto Pred = getPHIPred(Instr, i); 2481 if (Pred == Exit) { 2482 setPhiPred(Instr, i, NewExit); 2483 } 2484 } 2485 } 2486 2487 return NewExit; 2488 } 2489 2490 2491 static MachineBasicBlock *split(MachineBasicBlock::iterator I) { 2492 // Create the fall-through block. 2493 MachineBasicBlock *MBB = (*I).getParent(); 2494 MachineFunction *MF = MBB->getParent(); 2495 MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock(); 2496 auto MBBIter = ++(MBB->getIterator()); 2497 MF->insert(MBBIter, SuccMBB); 2498 SuccMBB->transferSuccessorsAndUpdatePHIs(MBB); 2499 MBB->addSuccessor(SuccMBB); 2500 2501 // Splice the code over. 2502 SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end()); 2503 2504 return SuccMBB; 2505 } 2506 2507 // Split the entry block separating PHI-nodes and the rest of the code 2508 // This is needed to insert an initializer for the bb select register 2509 // inloop regions. 2510 2511 MachineBasicBlock * 2512 AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { 2513 MachineBasicBlock *Entry = LRegion->getEntry(); 2514 MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI()); 2515 MachineBasicBlock *Exit = LRegion->getExit(); 2516 2517 DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#" 2518 << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber() 2519 << "\n"); 2520 LRegion->addMBB(EntrySucc); 2521 2522 // Make the backedge go to Entry Succ 2523 if (Exit->isSuccessor(Entry)) { 2524 Exit->removeSuccessor(Entry); 2525 } 2526 Exit->addSuccessor(EntrySucc); 2527 MachineInstr &Branch = *(Exit->instr_rbegin()); 2528 for (auto &UI : Branch.uses()) { 2529 if (UI.isMBB() && UI.getMBB() == Entry) { 2530 UI.setMBB(EntrySucc); 2531 } 2532 } 2533 2534 splitLoopPHIs(Entry, EntrySucc, LRegion); 2535 2536 return EntrySucc; 2537 } 2538 2539 LinearizedRegion * 2540 AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) { 2541 LinearizedRegion *LRegion = Region->getLinearizedRegion(); 2542 LRegion->initLiveOut(Region, MRI, TRI, PHIInfo); 2543 LRegion->setEntry(Region->getEntry()); 2544 return LRegion; 2545 } 2546 2547 static void removeOldExitPreds(RegionMRT *Region) { 2548 MachineBasicBlock *Exit = Region->getSucc(); 2549 if (Exit == nullptr) { 2550 return; 2551 } 2552 for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), 2553 E = Exit->pred_end(); 2554 PI != E; ++PI) { 2555 if (Region->contains(*PI)) { 2556 (*PI)->removeSuccessor(Exit); 2557 } 2558 } 2559 } 2560 2561 static bool mbbHasBackEdge(MachineBasicBlock *MBB, 2562 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { 2563 for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { 2564 if (MBBs.count(*SI) != 0) { 2565 return true; 2566 } 2567 } 2568 return false; 2569 } 2570 2571 static bool containsNewBackedge(MRT *Tree, 2572 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { 2573 // Need to traverse this in reverse since it is in post order. 2574 if (Tree == nullptr) 2575 return false; 2576 2577 if (Tree->isMBB()) { 2578 MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB(); 2579 MBBs.insert(MBB); 2580 if (mbbHasBackEdge(MBB, MBBs)) { 2581 return true; 2582 } 2583 } else { 2584 RegionMRT *Region = Tree->getRegionMRT(); 2585 SetVector<MRT *> *Children = Region->getChildren(); 2586 for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) { 2587 if (containsNewBackedge(*CI, MBBs)) 2588 return true; 2589 } 2590 } 2591 return false; 2592 } 2593 2594 static bool containsNewBackedge(RegionMRT *Region) { 2595 SmallPtrSet<MachineBasicBlock *, 8> MBBs; 2596 return containsNewBackedge(Region, MBBs); 2597 } 2598 2599 bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { 2600 auto *LRegion = initLinearizedRegion(Region); 2601 LRegion->setHasLoop(containsNewBackedge(Region)); 2602 MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region); 2603 MachineBasicBlock *CurrentMerge = LastMerge; 2604 LRegion->addMBB(LastMerge); 2605 LRegion->setExit(LastMerge); 2606 2607 rewriteRegionExitPHIs(Region, LastMerge, LRegion); 2608 removeOldExitPreds(Region); 2609 2610 DEBUG(PHIInfo.dump(MRI)); 2611 2612 SetVector<MRT *> *Children = Region->getChildren(); 2613 DEBUG(dbgs() << "===========If Region Start===============\n"); 2614 if (LRegion->getHasLoop()) { 2615 DEBUG(dbgs() << "Has Backedge: Yes\n"); 2616 } else { 2617 DEBUG(dbgs() << "Has Backedge: No\n"); 2618 } 2619 2620 unsigned BBSelectRegIn; 2621 unsigned BBSelectRegOut; 2622 for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { 2623 DEBUG(dbgs() << "CurrentRegion: \n"); 2624 DEBUG(LRegion->print(dbgs(), TRI)); 2625 2626 auto CNI = CI; 2627 ++CNI; 2628 2629 MRT *Child = (*CI); 2630 2631 if (Child->isRegion()) { 2632 2633 LinearizedRegion *InnerLRegion = 2634 Child->getRegionMRT()->getLinearizedRegion(); 2635 // We found the block is the exit of an inner region, we need 2636 // to put it in the current linearized region. 2637 2638 DEBUG(dbgs() << "Linearizing region: "); 2639 DEBUG(InnerLRegion->print(dbgs(), TRI)); 2640 DEBUG(dbgs() << "\n"); 2641 2642 MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); 2643 if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) { 2644 // Entry has already been linearized, no need to do this region. 2645 unsigned OuterSelect = InnerLRegion->getBBSelectRegOut(); 2646 unsigned InnerSelectReg = 2647 InnerLRegion->getRegionMRT()->getInnerOutputRegister(); 2648 replaceRegisterWith(InnerSelectReg, OuterSelect), 2649 resolvePHIInfos(InnerEntry); 2650 if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge)) 2651 InnerLRegion->getExit()->addSuccessor(CurrentMerge); 2652 continue; 2653 } 2654 2655 BBSelectRegOut = Child->getBBSelectRegOut(); 2656 BBSelectRegIn = Child->getBBSelectRegIn(); 2657 2658 DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) 2659 << "\n"); 2660 DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) 2661 << "\n"); 2662 2663 MachineBasicBlock *IfEnd = CurrentMerge; 2664 CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, 2665 Child->getRegionMRT()->getEntry(), 2666 BBSelectRegIn, BBSelectRegOut); 2667 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); 2668 } else { 2669 MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); 2670 DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); 2671 2672 if (MBB == getSingleExitNode(*(MBB->getParent()))) { 2673 // If this is the exit block then we need to skip to the next. 2674 // The "in" register will be transferred to "out" in the next 2675 // iteration. 2676 continue; 2677 } 2678 2679 BBSelectRegOut = Child->getBBSelectRegOut(); 2680 BBSelectRegIn = Child->getBBSelectRegIn(); 2681 2682 DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) 2683 << "\n"); 2684 DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) 2685 << "\n"); 2686 2687 MachineBasicBlock *IfEnd = CurrentMerge; 2688 // This is a basic block that is not part of an inner region, we 2689 // need to put it in the current linearized region. 2690 CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn, 2691 BBSelectRegOut); 2692 if (CurrentMerge) { 2693 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); 2694 } 2695 2696 DEBUG(PHIInfo.dump(MRI)); 2697 } 2698 } 2699 2700 LRegion->removeFalseRegisterKills(MRI); 2701 2702 if (LRegion->getHasLoop()) { 2703 MachineBasicBlock *NewSucc = splitEntry(LRegion); 2704 if (isFunctionEntryBlock(LRegion->getEntry())) { 2705 resolvePHIInfos(LRegion->getEntry()); 2706 } 2707 const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); 2708 unsigned InReg = LRegion->getBBSelectRegIn(); 2709 unsigned InnerSelectReg = 2710 MRI->createVirtualRegister(MRI->getRegClass(InReg)); 2711 unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); 2712 TII->materializeImmediate(*(LRegion->getEntry()), 2713 LRegion->getEntry()->getFirstTerminator(), DL, 2714 NewInReg, Region->getEntry()->getNumber()); 2715 // Need to be careful about updating the registers inside the region. 2716 LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI); 2717 DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n"); 2718 insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc, 2719 InnerSelectReg, NewInReg, 2720 LRegion->getRegionMRT()->getInnerOutputRegister()); 2721 splitExit(LRegion); 2722 TII->convertNonUniformLoopRegion(NewSucc, LastMerge); 2723 } 2724 2725 if (Region->isRoot()) { 2726 TII->insertReturn(*LastMerge); 2727 } 2728 2729 DEBUG(Region->getEntry()->getParent()->dump()); 2730 DEBUG(LRegion->print(dbgs(), TRI)); 2731 DEBUG(PHIInfo.dump(MRI)); 2732 2733 DEBUG(dbgs() << "===========If Region End===============\n"); 2734 2735 Region->setLinearizedRegion(LRegion); 2736 return true; 2737 } 2738 2739 bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) { 2740 if (false && regionIsSimpleIf(Region)) { 2741 transformSimpleIfRegion(Region); 2742 return true; 2743 } else if (regionIsSequence(Region)) { 2744 fixupRegionExits(Region); 2745 return false; 2746 } else { 2747 structurizeComplexRegion(Region); 2748 } 2749 return false; 2750 } 2751 2752 static int structurize_once = 0; 2753 2754 bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region, 2755 bool isTopRegion) { 2756 bool Changed = false; 2757 2758 auto Children = Region->getChildren(); 2759 for (auto CI : *Children) { 2760 if (CI->isRegion()) { 2761 Changed |= structurizeRegions(CI->getRegionMRT(), false); 2762 } 2763 } 2764 2765 if (structurize_once < 2 || true) { 2766 Changed |= structurizeRegion(Region); 2767 structurize_once++; 2768 } 2769 return Changed; 2770 } 2771 2772 void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { 2773 DEBUG(dbgs() << "Fallthrough Map:\n"); 2774 for (auto &MBBI : MF) { 2775 MachineBasicBlock *MBB = MBBI.getFallThrough(); 2776 if (MBB != nullptr) { 2777 DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> " 2778 << MBB->getNumber() << "\n"); 2779 } 2780 FallthroughMap[&MBBI] = MBB; 2781 } 2782 } 2783 2784 void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region, 2785 unsigned SelectOut) { 2786 LinearizedRegion *LRegion = new LinearizedRegion(); 2787 if (SelectOut) { 2788 LRegion->addLiveOut(SelectOut); 2789 DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(SelectOut, TRI) 2790 << "\n"); 2791 } 2792 LRegion->setRegionMRT(Region); 2793 Region->setLinearizedRegion(LRegion); 2794 LRegion->setParent(Region->getParent() 2795 ? Region->getParent()->getLinearizedRegion() 2796 : nullptr); 2797 } 2798 2799 unsigned 2800 AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut, 2801 MachineRegisterInfo *MRI, 2802 const SIInstrInfo *TII) { 2803 if (MRT->isRegion()) { 2804 RegionMRT *Region = MRT->getRegionMRT(); 2805 Region->setBBSelectRegOut(SelectOut); 2806 unsigned InnerSelectOut = createBBSelectReg(TII, MRI); 2807 2808 // Fixme: Move linearization creation to the original spot 2809 createLinearizedRegion(Region, SelectOut); 2810 2811 for (auto CI = Region->getChildren()->begin(), 2812 CE = Region->getChildren()->end(); 2813 CI != CE; ++CI) { 2814 InnerSelectOut = 2815 initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII); 2816 } 2817 MRT->setBBSelectRegIn(InnerSelectOut); 2818 return InnerSelectOut; 2819 } else { 2820 MRT->setBBSelectRegOut(SelectOut); 2821 unsigned NewSelectIn = createBBSelectReg(TII, MRI); 2822 MRT->setBBSelectRegIn(NewSelectIn); 2823 return NewSelectIn; 2824 } 2825 } 2826 2827 static void checkRegOnlyPHIInputs(MachineFunction &MF) { 2828 for (auto &MBBI : MF) { 2829 for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(), 2830 E = MBBI.instr_end(); 2831 I != E; ++I) { 2832 MachineInstr &Instr = *I; 2833 if (Instr.isPHI()) { 2834 int numPreds = getPHINumInputs(Instr); 2835 for (int i = 0; i < numPreds; ++i) { 2836 assert(Instr.getOperand(i * 2 + 1).isReg() && 2837 "PHI Operand not a register"); 2838 } 2839 } 2840 } 2841 } 2842 } 2843 2844 2845 INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", 2846 "AMDGPU Machine CFG Structurizer", false, false) 2847 INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) 2848 INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", 2849 "AMDGPU Machine CFG Structurizer", false, false) 2850 2851 char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID; 2852 2853 2854 bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { 2855 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 2856 const SIInstrInfo *TII = ST.getInstrInfo(); 2857 TRI = ST.getRegisterInfo(); 2858 MRI = &(MF.getRegInfo()); 2859 initFallthroughMap(MF); 2860 2861 checkRegOnlyPHIInputs(MF); 2862 DEBUG(dbgs() << "----STRUCTURIZER START----\n"); 2863 DEBUG(MF.dump()); 2864 2865 Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo()); 2866 DEBUG(Regions->dump()); 2867 2868 RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI); 2869 setRegionMRT(RTree); 2870 initializeSelectRegisters(RTree, 0, MRI, TII); 2871 DEBUG(RTree->dump(TRI)); 2872 bool result = structurizeRegions(RTree, true); 2873 delete RTree; 2874 DEBUG(dbgs() << "----STRUCTURIZER END----\n"); 2875 initFallthroughMap(MF); 2876 return result; 2877 } 2878 2879 FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() { 2880 return new AMDGPUMachineCFGStructurizer(); 2881 } 2882