1 //===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the machine instruction level CFG structurizer pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "SIInstrInfo.h" 16 #include "AMDGPUSubtarget.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/Analysis/CFG.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegionInfo.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/IR/DebugLoc.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Target/TargetInstrInfo.h" 33 #include "llvm/Target/TargetLowering.h" 34 #include "llvm/Target/TargetSubtargetInfo.h" 35 #include <tuple> 36 using namespace llvm; 37 38 #define DEBUG_TYPE "amdgpucfgstructurizer" 39 40 namespace { 41 class PHILinearizeDestIterator; 42 43 class PHILinearize { 44 friend class PHILinearizeDestIterator; 45 46 public: 47 typedef std::pair<unsigned, MachineBasicBlock *> PHISourceT; 48 49 private: 50 typedef DenseSet<PHISourceT> PHISourcesT; 51 typedef struct { 52 unsigned DestReg; 53 DebugLoc DL; 54 PHISourcesT Sources; 55 } PHIInfoElementT; 56 typedef SmallPtrSet<PHIInfoElementT *, 2> PHIInfoT; 57 PHIInfoT PHIInfo; 58 59 static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); 60 static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef); 61 static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info); 62 static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg, 63 MachineBasicBlock *SourceMBB); 64 static void phiInfoElementRemoveSource(PHIInfoElementT *Info, 65 unsigned SourceReg, 66 MachineBasicBlock *SourceMBB); 67 PHIInfoElementT *findPHIInfoElement(unsigned DestReg); 68 PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg, 69 MachineBasicBlock *SourceMBB); 70 71 public: 72 bool findSourcesFromMBB(MachineBasicBlock *SourceMBB, 73 SmallVector<unsigned, 4> &Sources); 74 void addDest(unsigned DestReg, const DebugLoc &DL); 75 void replaceDef(unsigned OldDestReg, unsigned NewDestReg); 76 void deleteDef(unsigned DestReg); 77 void addSource(unsigned DestReg, unsigned SourceReg, 78 MachineBasicBlock *SourceMBB); 79 void removeSource(unsigned DestReg, unsigned SourceReg, 80 MachineBasicBlock *SourceMBB = nullptr); 81 bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, 82 unsigned &DestReg); 83 bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr); 84 unsigned getNumSources(unsigned DestReg); 85 void dump(MachineRegisterInfo *MRI); 86 void clear(); 87 88 typedef PHISourcesT::iterator source_iterator; 89 typedef PHILinearizeDestIterator dest_iterator; 90 91 dest_iterator dests_begin(); 92 dest_iterator dests_end(); 93 94 source_iterator sources_begin(unsigned Reg); 95 source_iterator sources_end(unsigned Reg); 96 }; 97 98 class PHILinearizeDestIterator { 99 private: 100 PHILinearize::PHIInfoT::iterator Iter; 101 102 public: 103 unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); } 104 PHILinearizeDestIterator &operator++() { 105 ++Iter; 106 return *this; 107 } 108 bool operator==(const PHILinearizeDestIterator &I) const { 109 return I.Iter == Iter; 110 } 111 bool operator!=(const PHILinearizeDestIterator &I) const { 112 return I.Iter != Iter; 113 } 114 115 PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} 116 }; 117 118 unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { 119 return Info->DestReg; 120 } 121 122 void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info, 123 unsigned NewDef) { 124 Info->DestReg = NewDef; 125 } 126 127 PHILinearize::PHISourcesT & 128 PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) { 129 return Info->Sources; 130 } 131 132 void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info, 133 unsigned SourceReg, 134 MachineBasicBlock *SourceMBB) { 135 // Assertion ensures we don't use the same SourceMBB for the 136 // sources, because we cannot have different registers with 137 // identical predecessors, but we can have the same register for 138 // multiple predecessors. 139 for (auto SI : phiInfoElementGetSources(Info)) { 140 assert((SI.second != SourceMBB || SourceReg == SI.first)); 141 } 142 143 phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB)); 144 } 145 146 void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info, 147 unsigned SourceReg, 148 MachineBasicBlock *SourceMBB) { 149 auto &Sources = phiInfoElementGetSources(Info); 150 SmallVector<PHISourceT, 4> ElimiatedSources; 151 for (auto SI : Sources) { 152 if (SI.first == SourceReg && 153 (SI.second == nullptr || SI.second == SourceMBB)) { 154 ElimiatedSources.push_back(PHISourceT(SI.first, SI.second)); 155 } 156 } 157 158 for (auto &Source : ElimiatedSources) { 159 Sources.erase(Source); 160 } 161 } 162 163 PHILinearize::PHIInfoElementT * 164 PHILinearize::findPHIInfoElement(unsigned DestReg) { 165 for (auto I : PHIInfo) { 166 if (phiInfoElementGetDest(I) == DestReg) { 167 return I; 168 } 169 } 170 return nullptr; 171 } 172 173 PHILinearize::PHIInfoElementT * 174 PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg, 175 MachineBasicBlock *SourceMBB) { 176 for (auto I : PHIInfo) { 177 for (auto SI : phiInfoElementGetSources(I)) { 178 if (SI.first == SourceReg && 179 (SI.second == nullptr || SI.second == SourceMBB)) { 180 return I; 181 } 182 } 183 } 184 return nullptr; 185 } 186 187 bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB, 188 SmallVector<unsigned, 4> &Sources) { 189 bool FoundSource = false; 190 for (auto I : PHIInfo) { 191 for (auto SI : phiInfoElementGetSources(I)) { 192 if (SI.second == SourceMBB) { 193 FoundSource = true; 194 Sources.push_back(SI.first); 195 } 196 } 197 } 198 return FoundSource; 199 } 200 201 void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) { 202 assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists"); 203 PHISourcesT EmptySet; 204 PHIInfoElementT *NewElement = new PHIInfoElementT(); 205 NewElement->DestReg = DestReg; 206 NewElement->DL = DL; 207 NewElement->Sources = EmptySet; 208 PHIInfo.insert(NewElement); 209 } 210 211 void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) { 212 phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg); 213 } 214 215 void PHILinearize::deleteDef(unsigned DestReg) { 216 PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg); 217 PHIInfo.erase(InfoElement); 218 delete InfoElement; 219 } 220 221 void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg, 222 MachineBasicBlock *SourceMBB) { 223 phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); 224 } 225 226 void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg, 227 MachineBasicBlock *SourceMBB) { 228 phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); 229 } 230 231 bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, 232 unsigned &DestReg) { 233 PHIInfoElementT *InfoElement = 234 findPHIInfoElementFromSource(SourceReg, SourceMBB); 235 if (InfoElement != nullptr) { 236 DestReg = phiInfoElementGetDest(InfoElement); 237 return true; 238 } 239 return false; 240 } 241 242 bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) { 243 unsigned DestReg; 244 return findDest(Reg, SourceMBB, DestReg); 245 } 246 247 unsigned PHILinearize::getNumSources(unsigned DestReg) { 248 return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size(); 249 } 250 251 void PHILinearize::dump(MachineRegisterInfo *MRI) { 252 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 253 dbgs() << "=PHIInfo Start=\n"; 254 for (auto PII : this->PHIInfo) { 255 PHIInfoElementT &Element = *PII; 256 dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI) 257 << " Sources: {"; 258 for (auto &SI : Element.Sources) { 259 dbgs() << PrintReg(SI.first, TRI) << "(BB#" 260 << SI.second->getNumber() << "),"; 261 } 262 dbgs() << "}\n"; 263 } 264 dbgs() << "=PHIInfo End=\n"; 265 } 266 267 void PHILinearize::clear() { PHIInfo = PHIInfoT(); } 268 269 PHILinearize::dest_iterator PHILinearize::dests_begin() { 270 return PHILinearizeDestIterator(PHIInfo.begin()); 271 } 272 273 PHILinearize::dest_iterator PHILinearize::dests_end() { 274 return PHILinearizeDestIterator(PHIInfo.end()); 275 } 276 277 PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) { 278 auto InfoElement = findPHIInfoElement(Reg); 279 return phiInfoElementGetSources(InfoElement).begin(); 280 } 281 PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) { 282 auto InfoElement = findPHIInfoElement(Reg); 283 return phiInfoElementGetSources(InfoElement).end(); 284 } 285 286 class RegionMRT; 287 class MBBMRT; 288 289 static unsigned getPHINumInputs(MachineInstr &PHI) { 290 assert(PHI.isPHI()); 291 return (PHI.getNumOperands() - 1) / 2; 292 } 293 294 static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) { 295 assert(PHI.isPHI()); 296 return PHI.getOperand(Index * 2 + 2).getMBB(); 297 } 298 299 static void setPhiPred(MachineInstr &PHI, unsigned Index, 300 MachineBasicBlock *NewPred) { 301 PHI.getOperand(Index * 2 + 2).setMBB(NewPred); 302 } 303 304 static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) { 305 assert(PHI.isPHI()); 306 return PHI.getOperand(Index * 2 + 1).getReg(); 307 } 308 309 static unsigned getPHIDestReg(MachineInstr &PHI) { 310 assert(PHI.isPHI()); 311 return PHI.getOperand(0).getReg(); 312 } 313 314 class LinearizedRegion { 315 protected: 316 MachineBasicBlock *Entry; 317 // The exit block is part of the region, and is the last 318 // merge block before exiting the region. 319 MachineBasicBlock *Exit; 320 DenseSet<unsigned> LiveOuts; 321 SmallPtrSet<MachineBasicBlock *, 1> MBBs; 322 bool HasLoop; 323 LinearizedRegion *Parent; 324 RegionMRT *RMRT; 325 326 void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, 327 MachineInstr *DefInstr, const MachineRegisterInfo *MRI, 328 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 329 330 void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, 331 MachineInstr *DefInstr, 332 const MachineRegisterInfo *MRI, 333 const TargetRegisterInfo *TRI, 334 PHILinearize &PHIInfo); 335 336 void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 337 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, 338 RegionMRT *TopRegion); 339 340 void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 341 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 342 343 void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI, 344 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, 345 RegionMRT *TopRegion = nullptr); 346 347 public: 348 void setRegionMRT(RegionMRT *Region) { RMRT = Region; } 349 350 RegionMRT *getRegionMRT() { return RMRT; } 351 352 void setParent(LinearizedRegion *P) { Parent = P; } 353 354 LinearizedRegion *getParent() { return Parent; } 355 356 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr); 357 358 void setBBSelectRegIn(unsigned Reg); 359 360 unsigned getBBSelectRegIn(); 361 362 void setBBSelectRegOut(unsigned Reg, bool IsLiveOut); 363 364 unsigned getBBSelectRegOut(); 365 366 void setHasLoop(bool Value); 367 368 bool getHasLoop(); 369 370 void addLiveOut(unsigned VReg); 371 372 void removeLiveOut(unsigned Reg); 373 374 void replaceLiveOut(unsigned OldReg, unsigned NewReg); 375 376 void replaceRegister(unsigned Register, unsigned NewRegister, 377 MachineRegisterInfo *MRI, bool ReplaceInside, 378 bool ReplaceOutside, bool IncludeLoopPHIs); 379 380 void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister, 381 bool IncludeLoopPHIs, 382 MachineRegisterInfo *MRI); 383 384 void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister, 385 bool IncludeLoopPHIs, 386 MachineRegisterInfo *MRI); 387 388 DenseSet<unsigned> *getLiveOuts(); 389 390 void setEntry(MachineBasicBlock *NewEntry); 391 392 MachineBasicBlock *getEntry(); 393 394 void setExit(MachineBasicBlock *NewExit); 395 396 MachineBasicBlock *getExit(); 397 398 void addMBB(MachineBasicBlock *MBB); 399 400 void addMBBs(LinearizedRegion *InnerRegion); 401 402 bool contains(MachineBasicBlock *MBB); 403 404 bool isLiveOut(unsigned Reg); 405 406 bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI); 407 408 void removeFalseRegisterKills(MachineRegisterInfo *MRI); 409 410 void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI, 411 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 412 413 LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 414 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 415 416 LinearizedRegion(); 417 418 ~LinearizedRegion(); 419 }; 420 421 class MRT { 422 protected: 423 RegionMRT *Parent; 424 unsigned BBSelectRegIn; 425 unsigned BBSelectRegOut; 426 427 public: 428 unsigned getBBSelectRegIn() { return BBSelectRegIn; } 429 430 unsigned getBBSelectRegOut() { return BBSelectRegOut; } 431 432 void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; } 433 434 void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; } 435 436 virtual RegionMRT *getRegionMRT() { return nullptr; } 437 438 virtual MBBMRT *getMBBMRT() { return nullptr; } 439 440 bool isRegion() { return getRegionMRT() != nullptr; } 441 442 bool isMBB() { return getMBBMRT() != nullptr; } 443 444 bool isRoot() { return Parent == nullptr; } 445 446 void setParent(RegionMRT *Region) { Parent = Region; } 447 448 RegionMRT *getParent() { return Parent; } 449 450 static MachineBasicBlock * 451 initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, 452 DenseMap<MachineRegion *, RegionMRT *> &RegionMap); 453 454 static RegionMRT *buildMRT(MachineFunction &MF, 455 const MachineRegionInfo *RegionInfo, 456 const SIInstrInfo *TII, 457 MachineRegisterInfo *MRI); 458 459 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0; 460 461 void dumpDepth(int depth) { 462 for (int i = depth; i > 0; --i) { 463 dbgs() << " "; 464 } 465 } 466 467 virtual ~MRT() {} 468 }; 469 470 class MBBMRT : public MRT { 471 MachineBasicBlock *MBB; 472 473 public: 474 virtual MBBMRT *getMBBMRT() { return this; } 475 476 MachineBasicBlock *getMBB() { return MBB; } 477 478 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { 479 dumpDepth(depth); 480 dbgs() << "MBB: " << getMBB()->getNumber(); 481 dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); 482 dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; 483 } 484 485 MBBMRT(MachineBasicBlock *BB) : MBB(BB) { 486 setParent(nullptr); 487 setBBSelectRegOut(0); 488 setBBSelectRegIn(0); 489 } 490 }; 491 492 class RegionMRT : public MRT { 493 protected: 494 MachineRegion *Region; 495 LinearizedRegion *LRegion; 496 MachineBasicBlock *Succ; 497 498 SetVector<MRT *> Children; 499 500 public: 501 virtual RegionMRT *getRegionMRT() { return this; } 502 503 void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { 504 LRegion = LinearizeRegion; 505 } 506 507 LinearizedRegion *getLinearizedRegion() { return LRegion; } 508 509 MachineRegion *getMachineRegion() { return Region; } 510 511 unsigned getInnerOutputRegister() { 512 return (*(Children.begin()))->getBBSelectRegOut(); 513 } 514 515 void addChild(MRT *Tree) { Children.insert(Tree); } 516 517 SetVector<MRT *> *getChildren() { return &Children; } 518 519 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { 520 dumpDepth(depth); 521 dbgs() << "Region: " << (void *)Region; 522 dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); 523 dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; 524 525 dumpDepth(depth); 526 if (getSucc()) 527 dbgs() << "Succ: " << getSucc()->getNumber() << "\n"; 528 else 529 dbgs() << "Succ: none \n"; 530 for (auto MRTI : Children) { 531 MRTI->dump(TRI, depth + 1); 532 } 533 } 534 535 MRT *getEntryTree() { return Children.back(); } 536 537 MRT *getExitTree() { return Children.front(); } 538 539 MachineBasicBlock *getEntry() { 540 MRT *Tree = Children.back(); 541 return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry() 542 : Tree->getMBBMRT()->getMBB(); 543 } 544 545 MachineBasicBlock *getExit() { 546 MRT *Tree = Children.front(); 547 return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit() 548 : Tree->getMBBMRT()->getMBB(); 549 } 550 551 void setSucc(MachineBasicBlock *MBB) { Succ = MBB; } 552 553 MachineBasicBlock *getSucc() { return Succ; } 554 555 bool contains(MachineBasicBlock *MBB) { 556 for (auto CI : Children) { 557 if (CI->isMBB()) { 558 if (MBB == CI->getMBBMRT()->getMBB()) { 559 return true; 560 } 561 } else { 562 if (CI->getRegionMRT()->contains(MBB)) { 563 return true; 564 } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr && 565 CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) { 566 return true; 567 } 568 } 569 } 570 return false; 571 } 572 573 void replaceLiveOutReg(unsigned Register, unsigned NewRegister) { 574 LinearizedRegion *LRegion = getLinearizedRegion(); 575 LRegion->replaceLiveOut(Register, NewRegister); 576 for (auto &CI : Children) { 577 if (CI->isRegion()) { 578 CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister); 579 } 580 } 581 } 582 583 RegionMRT(MachineRegion *MachineRegion) 584 : Region(MachineRegion), LRegion(nullptr), Succ(nullptr) { 585 setParent(nullptr); 586 setBBSelectRegOut(0); 587 setBBSelectRegIn(0); 588 } 589 590 virtual ~RegionMRT() { 591 if (LRegion) { 592 delete LRegion; 593 } 594 595 for (auto CI : Children) { 596 delete &(*CI); 597 } 598 } 599 }; 600 601 static unsigned createBBSelectReg(const SIInstrInfo *TII, 602 MachineRegisterInfo *MRI) { 603 return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); 604 } 605 606 MachineBasicBlock * 607 MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, 608 DenseMap<MachineRegion *, RegionMRT *> &RegionMap) { 609 for (auto &MFI : MF) { 610 MachineBasicBlock *ExitMBB = &MFI; 611 if (ExitMBB->succ_size() == 0) { 612 return ExitMBB; 613 } 614 } 615 llvm_unreachable("CFG has no exit block"); 616 return nullptr; 617 } 618 619 RegionMRT *MRT::buildMRT(MachineFunction &MF, 620 const MachineRegionInfo *RegionInfo, 621 const SIInstrInfo *TII, MachineRegisterInfo *MRI) { 622 SmallPtrSet<MachineRegion *, 4> PlacedRegions; 623 DenseMap<MachineRegion *, RegionMRT *> RegionMap; 624 MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion(); 625 RegionMRT *Result = new RegionMRT(TopLevelRegion); 626 RegionMap[TopLevelRegion] = Result; 627 628 // Insert the exit block first, we need it to be the merge node 629 // for the top level region. 630 MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap); 631 632 unsigned BBSelectRegIn = createBBSelectReg(TII, MRI); 633 MBBMRT *ExitMRT = new MBBMRT(Exit); 634 RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT); 635 ExitMRT->setBBSelectRegIn(BBSelectRegIn); 636 637 for (auto MBBI : post_order(&(MF.front()))) { 638 MachineBasicBlock *MBB = &(*MBBI); 639 640 // Skip Exit since we already added it 641 if (MBB == Exit) { 642 continue; 643 } 644 645 DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n"); 646 MBBMRT *NewMBB = new MBBMRT(MBB); 647 MachineRegion *Region = RegionInfo->getRegionFor(MBB); 648 649 // Ensure we have the MRT region 650 if (RegionMap.count(Region) == 0) { 651 RegionMRT *NewMRTRegion = new RegionMRT(Region); 652 RegionMap[Region] = NewMRTRegion; 653 654 // Ensure all parents are in the RegionMap 655 MachineRegion *Parent = Region->getParent(); 656 while (RegionMap.count(Parent) == 0) { 657 RegionMRT *NewMRTParent = new RegionMRT(Parent); 658 NewMRTParent->addChild(NewMRTRegion); 659 NewMRTRegion->setParent(NewMRTParent); 660 RegionMap[Parent] = NewMRTParent; 661 NewMRTRegion = NewMRTParent; 662 Parent = Parent->getParent(); 663 } 664 RegionMap[Parent]->addChild(NewMRTRegion); 665 NewMRTRegion->setParent(RegionMap[Parent]); 666 } 667 668 // Add MBB to Region MRT 669 RegionMap[Region]->addChild(NewMBB); 670 NewMBB->setParent(RegionMap[Region]); 671 RegionMap[Region]->setSucc(Region->getExit()); 672 } 673 return Result; 674 } 675 676 void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, 677 MachineInstr *DefInstr, 678 const MachineRegisterInfo *MRI, 679 const TargetRegisterInfo *TRI, 680 PHILinearize &PHIInfo) { 681 if (TRI->isVirtualRegister(Reg)) { 682 DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); 683 // If this is a source register to a PHI we are chaining, it 684 // must be live out. 685 if (PHIInfo.isSource(Reg)) { 686 DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n"); 687 addLiveOut(Reg); 688 } else { 689 // If this is live out of the MBB 690 for (auto &UI : MRI->use_operands(Reg)) { 691 if (UI.getParent()->getParent() != MBB) { 692 DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber() 693 << "): " << PrintReg(Reg, TRI) << "\n"); 694 addLiveOut(Reg); 695 } else { 696 // If the use is in the same MBB we have to make sure 697 // it is after the def, otherwise it is live out in a loop 698 MachineInstr *UseInstr = UI.getParent(); 699 for (MachineBasicBlock::instr_iterator 700 MII = UseInstr->getIterator(), 701 MIE = UseInstr->getParent()->instr_end(); 702 MII != MIE; ++MII) { 703 if ((&(*MII)) == DefInstr) { 704 DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI) 705 << "\n"); 706 addLiveOut(Reg); 707 } 708 } 709 } 710 } 711 } 712 } 713 } 714 715 void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, 716 MachineInstr *DefInstr, 717 const MachineRegisterInfo *MRI, 718 const TargetRegisterInfo *TRI, 719 PHILinearize &PHIInfo) { 720 if (TRI->isVirtualRegister(Reg)) { 721 DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); 722 for (auto &UI : MRI->use_operands(Reg)) { 723 if (!Region->contains(UI.getParent()->getParent())) { 724 DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region 725 << "): " << PrintReg(Reg, TRI) << "\n"); 726 addLiveOut(Reg); 727 } 728 } 729 } 730 } 731 732 void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, 733 const MachineRegisterInfo *MRI, 734 const TargetRegisterInfo *TRI, 735 PHILinearize &PHIInfo) { 736 DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n"); 737 for (auto &II : *MBB) { 738 for (auto &RI : II.defs()) { 739 storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); 740 } 741 for (auto &IRI : II.implicit_operands()) { 742 if (IRI.isDef()) { 743 storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo); 744 } 745 } 746 } 747 748 // If we have a successor with a PHI, source coming from this MBB we have to 749 // add the register as live out 750 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 751 E = MBB->succ_end(); 752 SI != E; ++SI) { 753 for (auto &II : *(*SI)) { 754 if (II.isPHI()) { 755 MachineInstr &PHI = II; 756 int numPreds = getPHINumInputs(PHI); 757 for (int i = 0; i < numPreds; ++i) { 758 if (getPHIPred(PHI, i) == MBB) { 759 unsigned PHIReg = getPHISourceReg(PHI, i); 760 DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber() 761 << " -> BB#" << (*SI)->getNumber() 762 << "): " << PrintReg(PHIReg, TRI) << "\n"); 763 addLiveOut(PHIReg); 764 } 765 } 766 } 767 } 768 } 769 770 DEBUG(dbgs() << "-Store Live Outs Endn-\n"); 771 } 772 773 void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB, 774 const MachineRegisterInfo *MRI, 775 const TargetRegisterInfo *TRI, 776 PHILinearize &PHIInfo, 777 RegionMRT *TopRegion) { 778 for (auto &II : *MBB) { 779 for (auto &RI : II.defs()) { 780 storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI, 781 PHIInfo); 782 } 783 for (auto &IRI : II.implicit_operands()) { 784 if (IRI.isDef()) { 785 storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI, 786 TRI, PHIInfo); 787 } 788 } 789 } 790 } 791 792 void LinearizedRegion::storeLiveOuts(RegionMRT *Region, 793 const MachineRegisterInfo *MRI, 794 const TargetRegisterInfo *TRI, 795 PHILinearize &PHIInfo, 796 RegionMRT *CurrentTopRegion) { 797 MachineBasicBlock *Exit = Region->getSucc(); 798 799 RegionMRT *TopRegion = 800 CurrentTopRegion == nullptr ? Region : CurrentTopRegion; 801 802 // Check if exit is end of function, if so, no live outs. 803 if (Exit == nullptr) 804 return; 805 806 auto Children = Region->getChildren(); 807 for (auto CI : *Children) { 808 if (CI->isMBB()) { 809 auto MBB = CI->getMBBMRT()->getMBB(); 810 storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion); 811 } else { 812 LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion(); 813 // We should be limited to only store registers that are live out from the 814 // lineaized region 815 for (auto MBBI : SubRegion->MBBs) { 816 storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion); 817 } 818 } 819 } 820 821 if (CurrentTopRegion == nullptr) { 822 auto Succ = Region->getSucc(); 823 for (auto &II : *Succ) { 824 if (II.isPHI()) { 825 MachineInstr &PHI = II; 826 int numPreds = getPHINumInputs(PHI); 827 for (int i = 0; i < numPreds; ++i) { 828 if (Region->contains(getPHIPred(PHI, i))) { 829 unsigned PHIReg = getPHISourceReg(PHI, i); 830 DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region 831 << "): " << PrintReg(PHIReg, TRI) << "\n"); 832 addLiveOut(PHIReg); 833 } 834 } 835 } 836 } 837 } 838 } 839 840 void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { 841 OS << "Linearized Region {"; 842 bool IsFirst = true; 843 for (const auto &MBB : MBBs) { 844 if (IsFirst) { 845 IsFirst = false; 846 } else { 847 OS << " ,"; 848 } 849 OS << MBB->getNumber(); 850 } 851 OS << "} (" << Entry->getNumber() << ", " 852 << (Exit == nullptr ? -1 : Exit->getNumber()) 853 << "): In:" << PrintReg(getBBSelectRegIn(), TRI) 854 << " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {"; 855 for (auto &LI : LiveOuts) { 856 OS << PrintReg(LI, TRI) << " "; 857 } 858 OS << "} \n"; 859 } 860 861 unsigned LinearizedRegion::getBBSelectRegIn() { 862 return getRegionMRT()->getBBSelectRegIn(); 863 } 864 865 unsigned LinearizedRegion::getBBSelectRegOut() { 866 return getRegionMRT()->getBBSelectRegOut(); 867 } 868 869 void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; } 870 871 bool LinearizedRegion::getHasLoop() { return HasLoop; } 872 873 void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); } 874 875 void LinearizedRegion::removeLiveOut(unsigned Reg) { 876 if (isLiveOut(Reg)) 877 LiveOuts.erase(Reg); 878 } 879 880 void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) { 881 if (isLiveOut(OldReg)) { 882 removeLiveOut(OldReg); 883 addLiveOut(NewReg); 884 } 885 } 886 887 void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, 888 MachineRegisterInfo *MRI, 889 bool ReplaceInside, bool ReplaceOutside, 890 bool IncludeLoopPHI) { 891 assert(Register != NewRegister && "Cannot replace a reg with itself"); 892 893 DEBUG(dbgs() << "Pepareing to replace register (region): " 894 << PrintReg(Register, MRI->getTargetRegisterInfo()) << " with " 895 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); 896 897 // If we are replacing outside, we also need to update the LiveOuts 898 if (ReplaceOutside && 899 (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) { 900 LinearizedRegion *Current = this; 901 while (Current != nullptr && Current->getEntry() != nullptr) { 902 DEBUG(dbgs() << "Region before register replace\n"); 903 DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); 904 Current->replaceLiveOut(Register, NewRegister); 905 DEBUG(dbgs() << "Region after register replace\n"); 906 DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); 907 Current = Current->getParent(); 908 } 909 } 910 911 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 912 E = MRI->reg_end(); 913 I != E;) { 914 MachineOperand &O = *I; 915 ++I; 916 917 // We don't rewrite defs. 918 if (O.isDef()) 919 continue; 920 921 bool IsInside = contains(O.getParent()->getParent()); 922 bool IsLoopPHI = IsInside && (O.getParent()->isPHI() && 923 O.getParent()->getParent() == getEntry()); 924 bool ShouldReplace = (IsInside && ReplaceInside) || 925 (!IsInside && ReplaceOutside) || 926 (IncludeLoopPHI && IsLoopPHI); 927 if (ShouldReplace) { 928 929 if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { 930 DEBUG(dbgs() << "Trying to substitute physical register: " 931 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 932 << "\n"); 933 llvm_unreachable("Cannot substitute physical registers"); 934 } else { 935 DEBUG(dbgs() << "Replacing register (region): " 936 << PrintReg(Register, MRI->getTargetRegisterInfo()) 937 << " with " 938 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 939 << "\n"); 940 O.setReg(NewRegister); 941 } 942 } 943 } 944 } 945 946 void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register, 947 unsigned NewRegister, 948 bool IncludeLoopPHIs, 949 MachineRegisterInfo *MRI) { 950 replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs); 951 } 952 953 void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register, 954 unsigned NewRegister, 955 bool IncludeLoopPHIs, 956 MachineRegisterInfo *MRI) { 957 replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs); 958 } 959 960 DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; } 961 962 void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) { 963 Entry = NewEntry; 964 } 965 966 MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; } 967 968 void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; } 969 970 MachineBasicBlock *LinearizedRegion::getExit() { return Exit; } 971 972 void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); } 973 974 void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) { 975 for (const auto &MBB : InnerRegion->MBBs) { 976 addMBB(MBB); 977 } 978 } 979 980 bool LinearizedRegion::contains(MachineBasicBlock *MBB) { 981 return MBBs.count(MBB) == 1; 982 } 983 984 bool LinearizedRegion::isLiveOut(unsigned Reg) { 985 return LiveOuts.count(Reg) == 1; 986 } 987 988 bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { 989 return MRI->def_begin(Reg) == MRI->def_end(); 990 } 991 992 // After the code has been structurized, what was flagged as kills 993 // before are no longer register kills. 994 void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { 995 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 996 for (auto MBBI : MBBs) { 997 MachineBasicBlock *MBB = MBBI; 998 for (auto &II : *MBB) { 999 for (auto &RI : II.uses()) { 1000 if (RI.isReg()) { 1001 unsigned Reg = RI.getReg(); 1002 if (TRI->isVirtualRegister(Reg)) { 1003 if (hasNoDef(Reg, MRI)) 1004 continue; 1005 if (!MRI->hasOneDef(Reg)) { 1006 DEBUG(this->getEntry()->getParent()->dump()); 1007 DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n"); 1008 } 1009 1010 if (MRI->def_begin(Reg) == MRI->def_end()) { 1011 DEBUG(dbgs() << "Register " 1012 << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1013 << " has NO defs\n"); 1014 } else if (!MRI->hasOneDef(Reg)) { 1015 DEBUG(dbgs() << "Register " 1016 << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1017 << " has multiple defs\n"); 1018 } 1019 1020 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 1021 MachineOperand *Def = &(*(MRI->def_begin(Reg))); 1022 MachineOperand *UseOperand = &(RI); 1023 bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; 1024 if (UseIsOutsideDefMBB && UseOperand->isKill()) { 1025 DEBUG(dbgs() << "Removing kill flag on register: " 1026 << PrintReg(Reg, TRI) << "\n"); 1027 UseOperand->setIsKill(false); 1028 } 1029 } 1030 } 1031 } 1032 } 1033 } 1034 } 1035 1036 void LinearizedRegion::initLiveOut(RegionMRT *Region, 1037 const MachineRegisterInfo *MRI, 1038 const TargetRegisterInfo *TRI, 1039 PHILinearize &PHIInfo) { 1040 storeLiveOuts(Region, MRI, TRI, PHIInfo); 1041 } 1042 1043 LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB, 1044 const MachineRegisterInfo *MRI, 1045 const TargetRegisterInfo *TRI, 1046 PHILinearize &PHIInfo) { 1047 setEntry(MBB); 1048 setExit(MBB); 1049 storeLiveOuts(MBB, MRI, TRI, PHIInfo); 1050 MBBs.insert(MBB); 1051 Parent = nullptr; 1052 } 1053 1054 LinearizedRegion::LinearizedRegion() { 1055 setEntry(nullptr); 1056 setExit(nullptr); 1057 Parent = nullptr; 1058 } 1059 1060 LinearizedRegion::~LinearizedRegion() {} 1061 1062 class AMDGPUMachineCFGStructurizer : public MachineFunctionPass { 1063 private: 1064 const MachineRegionInfo *Regions; 1065 const SIInstrInfo *TII; 1066 const TargetRegisterInfo *TRI; 1067 MachineRegisterInfo *MRI; 1068 unsigned BBSelectRegister; 1069 PHILinearize PHIInfo; 1070 DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap; 1071 1072 void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI, 1073 SmallVector<unsigned, 2> &RegionIndices); 1074 void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, 1075 SmallVector<unsigned, 2> &RegionIndices); 1076 void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, 1077 SmallVector<unsigned, 2> &PHINonRegionIndices); 1078 1079 void storePHILinearizationInfoDest( 1080 unsigned LDestReg, MachineInstr &PHI, 1081 SmallVector<unsigned, 2> *RegionIndices = nullptr); 1082 1083 unsigned storePHILinearizationInfo(MachineInstr &PHI, 1084 SmallVector<unsigned, 2> *RegionIndices); 1085 1086 void extractKilledPHIs(MachineBasicBlock *MBB); 1087 1088 bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices, 1089 unsigned *ReplaceReg); 1090 1091 bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1092 MachineBasicBlock *SourceMBB, 1093 SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg); 1094 1095 void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1096 MachineBasicBlock *LastMerge, 1097 SmallVector<unsigned, 2> &PHIRegionIndices); 1098 void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1099 MachineBasicBlock *IfMBB, 1100 SmallVector<unsigned, 2> &PHIRegionIndices); 1101 void replaceLiveOutRegs(MachineInstr &PHI, 1102 SmallVector<unsigned, 2> &PHIRegionIndices, 1103 unsigned CombinedSourceReg, 1104 LinearizedRegion *LRegion); 1105 void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge, 1106 MachineInstr &PHI, LinearizedRegion *LRegion); 1107 1108 void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge, 1109 LinearizedRegion *LRegion); 1110 void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB, 1111 MachineInstr &PHI); 1112 void rewriteRegionEntryPHIs(LinearizedRegion *Region, 1113 MachineBasicBlock *IfMBB); 1114 1115 bool regionIsSimpleIf(RegionMRT *Region); 1116 1117 void transformSimpleIfRegion(RegionMRT *Region); 1118 1119 void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II); 1120 1121 void insertUnconditionalBranch(MachineBasicBlock *MBB, 1122 MachineBasicBlock *Dest, 1123 const DebugLoc &DL = DebugLoc()); 1124 1125 MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region); 1126 1127 void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1128 MachineBasicBlock *MergeBB, unsigned DestRegister, 1129 unsigned IfSourceRegister, unsigned CodeSourceRegister, 1130 bool IsUndefIfSource = false); 1131 1132 MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB, 1133 MachineBasicBlock *CodeBBStart, 1134 MachineBasicBlock *CodeBBEnd, 1135 MachineBasicBlock *SelectBB, unsigned IfReg, 1136 bool InheritPreds); 1137 1138 void prunePHIInfo(MachineBasicBlock *MBB); 1139 void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg); 1140 1141 void createEntryPHIs(LinearizedRegion *CurrentRegion); 1142 void resolvePHIInfos(MachineBasicBlock *FunctionEntry); 1143 1144 void replaceRegisterWith(unsigned Register, unsigned NewRegister); 1145 1146 MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB, 1147 MachineBasicBlock *CodeBB, 1148 LinearizedRegion *LRegion, 1149 unsigned BBSelectRegIn, 1150 unsigned BBSelectRegOut); 1151 1152 MachineBasicBlock * 1153 createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion, 1154 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, 1155 unsigned BBSelectRegIn, unsigned BBSelectRegOut); 1156 void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond); 1157 1158 void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, 1159 MachineBasicBlock *MergeBB, 1160 unsigned BBSelectReg); 1161 1162 MachineInstr *getDefInstr(unsigned Reg); 1163 void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1164 MachineBasicBlock *MergeBB, 1165 LinearizedRegion *InnerRegion, unsigned DestReg, 1166 unsigned SourceReg); 1167 bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion, 1168 unsigned Register); 1169 void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1170 MachineBasicBlock *MergeBB, 1171 LinearizedRegion *InnerRegion, 1172 LinearizedRegion *LRegion); 1173 1174 void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry, 1175 MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion); 1176 void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc, 1177 LinearizedRegion *LRegion); 1178 1179 MachineBasicBlock *splitExit(LinearizedRegion *LRegion); 1180 1181 MachineBasicBlock *splitEntry(LinearizedRegion *LRegion); 1182 1183 LinearizedRegion *initLinearizedRegion(RegionMRT *Region); 1184 1185 bool structurizeComplexRegion(RegionMRT *Region); 1186 1187 bool structurizeRegion(RegionMRT *Region); 1188 1189 bool structurizeRegions(RegionMRT *Region, bool isTopRegion); 1190 1191 public: 1192 static char ID; 1193 1194 void getAnalysisUsage(AnalysisUsage &AU) const override { 1195 AU.addRequired<MachineRegionInfoPass>(); 1196 MachineFunctionPass::getAnalysisUsage(AU); 1197 } 1198 1199 AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) { 1200 initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); 1201 } 1202 1203 void initFallthroughMap(MachineFunction &MF); 1204 1205 void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut); 1206 1207 unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg, 1208 MachineRegisterInfo *MRI, 1209 const SIInstrInfo *TII); 1210 1211 RegionMRT *RMRT; 1212 void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; } 1213 1214 RegionMRT *getRegionMRT() { return RMRT; } 1215 1216 bool runOnMachineFunction(MachineFunction &MF) override; 1217 }; 1218 } 1219 1220 char AMDGPUMachineCFGStructurizer::ID = 0; 1221 1222 bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { 1223 MachineBasicBlock *Entry = Region->getEntry(); 1224 MachineBasicBlock *Succ = Region->getSucc(); 1225 bool FoundBypass = false; 1226 bool FoundIf = false; 1227 1228 if (Entry->succ_size() != 2) { 1229 return false; 1230 } 1231 1232 for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), 1233 E = Entry->succ_end(); 1234 SI != E; ++SI) { 1235 MachineBasicBlock *Current = *SI; 1236 1237 if (Current == Succ) { 1238 FoundBypass = true; 1239 } else if ((Current->succ_size() == 1) && 1240 *(Current->succ_begin()) == Succ) { 1241 FoundIf = true; 1242 } 1243 } 1244 1245 return FoundIf && FoundBypass; 1246 } 1247 1248 void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { 1249 MachineBasicBlock *Entry = Region->getEntry(); 1250 MachineBasicBlock *Exit = Region->getExit(); 1251 TII->convertNonUniformIfRegion(Entry, Exit); 1252 } 1253 1254 static void fixMBBTerminator(MachineBasicBlock *MBB) { 1255 1256 if (MBB->succ_size() == 1) { 1257 auto *Succ = *(MBB->succ_begin()); 1258 for (auto &TI : MBB->terminators()) { 1259 for (auto &UI : TI.uses()) { 1260 if (UI.isMBB() && UI.getMBB() != Succ) { 1261 UI.setMBB(Succ); 1262 } 1263 } 1264 } 1265 } 1266 } 1267 1268 static void fixRegionTerminator(RegionMRT *Region) { 1269 MachineBasicBlock *InternalSucc = nullptr; 1270 MachineBasicBlock *ExternalSucc = nullptr; 1271 LinearizedRegion *LRegion = Region->getLinearizedRegion(); 1272 auto Exit = LRegion->getExit(); 1273 1274 SmallPtrSet<MachineBasicBlock *, 2> Successors; 1275 for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(), 1276 SE = Exit->succ_end(); 1277 SI != SE; ++SI) { 1278 MachineBasicBlock *Succ = *SI; 1279 if (LRegion->contains(Succ)) { 1280 // Do not allow re-assign 1281 assert(InternalSucc == nullptr); 1282 InternalSucc = Succ; 1283 } else { 1284 // Do not allow re-assign 1285 assert(ExternalSucc == nullptr); 1286 ExternalSucc = Succ; 1287 } 1288 } 1289 1290 for (auto &TI : Exit->terminators()) { 1291 for (auto &UI : TI.uses()) { 1292 if (UI.isMBB()) { 1293 auto Target = UI.getMBB(); 1294 if (Target != InternalSucc && Target != ExternalSucc) { 1295 UI.setMBB(ExternalSucc); 1296 } 1297 } 1298 } 1299 } 1300 } 1301 1302 // If a region region is just a sequence of regions (and the exit 1303 // block in the case of the top level region), we can simply skip 1304 // linearizing it, because it is already linear 1305 bool regionIsSequence(RegionMRT *Region) { 1306 auto Children = Region->getChildren(); 1307 for (auto CI : *Children) { 1308 if (!CI->isRegion()) { 1309 if (CI->getMBBMRT()->getMBB()->succ_size() > 1) { 1310 return false; 1311 } 1312 } 1313 } 1314 return true; 1315 } 1316 1317 void fixupRegionExits(RegionMRT *Region) { 1318 auto Children = Region->getChildren(); 1319 for (auto CI : *Children) { 1320 if (!CI->isRegion()) { 1321 fixMBBTerminator(CI->getMBBMRT()->getMBB()); 1322 } else { 1323 fixRegionTerminator(CI->getRegionMRT()); 1324 } 1325 } 1326 } 1327 1328 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( 1329 RegionMRT *Region, MachineInstr &PHI, 1330 SmallVector<unsigned, 2> &PHIRegionIndices) { 1331 unsigned NumInputs = getPHINumInputs(PHI); 1332 for (unsigned i = 0; i < NumInputs; ++i) { 1333 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1334 if (Region->contains(Pred)) { 1335 PHIRegionIndices.push_back(i); 1336 } 1337 } 1338 } 1339 1340 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( 1341 LinearizedRegion *Region, MachineInstr &PHI, 1342 SmallVector<unsigned, 2> &PHIRegionIndices) { 1343 unsigned NumInputs = getPHINumInputs(PHI); 1344 for (unsigned i = 0; i < NumInputs; ++i) { 1345 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1346 if (Region->contains(Pred)) { 1347 PHIRegionIndices.push_back(i); 1348 } 1349 } 1350 } 1351 1352 void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices( 1353 LinearizedRegion *Region, MachineInstr &PHI, 1354 SmallVector<unsigned, 2> &PHINonRegionIndices) { 1355 unsigned NumInputs = getPHINumInputs(PHI); 1356 for (unsigned i = 0; i < NumInputs; ++i) { 1357 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1358 if (!Region->contains(Pred)) { 1359 PHINonRegionIndices.push_back(i); 1360 } 1361 } 1362 } 1363 1364 void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest( 1365 unsigned LDestReg, MachineInstr &PHI, 1366 SmallVector<unsigned, 2> *RegionIndices) { 1367 if (RegionIndices) { 1368 for (auto i : *RegionIndices) { 1369 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); 1370 } 1371 } else { 1372 unsigned NumInputs = getPHINumInputs(PHI); 1373 for (unsigned i = 0; i < NumInputs; ++i) { 1374 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); 1375 } 1376 } 1377 } 1378 1379 unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo( 1380 MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) { 1381 unsigned DestReg = getPHIDestReg(PHI); 1382 unsigned LinearizeDestReg = 1383 MRI->createVirtualRegister(MRI->getRegClass(DestReg)); 1384 PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); 1385 storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); 1386 return LinearizeDestReg; 1387 } 1388 1389 void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { 1390 // We need to create a new chain for the killed phi, but there is no 1391 // need to do the renaming outside or inside the block. 1392 SmallPtrSet<MachineInstr *, 2> PHIs; 1393 for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), 1394 E = MBB->instr_end(); 1395 I != E; ++I) { 1396 MachineInstr &Instr = *I; 1397 if (Instr.isPHI()) { 1398 unsigned PHIDestReg = getPHIDestReg(Instr); 1399 DEBUG(dbgs() << "Extractking killed phi:\n"); 1400 DEBUG(Instr.dump()); 1401 PHIs.insert(&Instr); 1402 PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); 1403 storePHILinearizationInfoDest(PHIDestReg, Instr); 1404 } 1405 } 1406 1407 for (auto PI : PHIs) { 1408 PI->eraseFromParent(); 1409 } 1410 } 1411 1412 static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices, 1413 unsigned Index) { 1414 for (auto i : PHIRegionIndices) { 1415 if (i == Index) 1416 return true; 1417 } 1418 return false; 1419 } 1420 1421 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, 1422 SmallVector<unsigned, 2> &PHIIndices, 1423 unsigned *ReplaceReg) { 1424 return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg); 1425 } 1426 1427 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, 1428 unsigned CombinedSourceReg, 1429 MachineBasicBlock *SourceMBB, 1430 SmallVector<unsigned, 2> &PHIIndices, 1431 unsigned *ReplaceReg) { 1432 DEBUG(dbgs() << "Shrink PHI: "); 1433 DEBUG(PHI.dump()); 1434 DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI) 1435 << "<def> = PHI("); 1436 1437 bool Replaced = false; 1438 unsigned NumInputs = getPHINumInputs(PHI); 1439 int SingleExternalEntryIndex = -1; 1440 for (unsigned i = 0; i < NumInputs; ++i) { 1441 if (!isPHIRegionIndex(PHIIndices, i)) { 1442 if (SingleExternalEntryIndex == -1) { 1443 // Single entry 1444 SingleExternalEntryIndex = i; 1445 } else { 1446 // Multiple entries 1447 SingleExternalEntryIndex = -2; 1448 } 1449 } 1450 } 1451 1452 if (SingleExternalEntryIndex > -1) { 1453 *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex); 1454 // We should not rewrite the code, we should only pick up the single value 1455 // that represents the shrunk PHI. 1456 Replaced = true; 1457 } else { 1458 MachineBasicBlock *MBB = PHI.getParent(); 1459 MachineInstrBuilder MIB = 1460 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1461 getPHIDestReg(PHI)); 1462 if (SourceMBB) { 1463 MIB.addReg(CombinedSourceReg); 1464 MIB.addMBB(SourceMBB); 1465 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1466 << SourceMBB->getNumber()); 1467 } 1468 1469 for (unsigned i = 0; i < NumInputs; ++i) { 1470 if (isPHIRegionIndex(PHIIndices, i)) { 1471 continue; 1472 } 1473 unsigned SourceReg = getPHISourceReg(PHI, i); 1474 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1475 MIB.addReg(SourceReg); 1476 MIB.addMBB(SourcePred); 1477 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1478 << SourcePred->getNumber()); 1479 } 1480 DEBUG(dbgs() << ")\n"); 1481 } 1482 PHI.eraseFromParent(); 1483 return Replaced; 1484 } 1485 1486 void AMDGPUMachineCFGStructurizer::replacePHI( 1487 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge, 1488 SmallVector<unsigned, 2> &PHIRegionIndices) { 1489 DEBUG(dbgs() << "Replace PHI: "); 1490 DEBUG(PHI.dump()); 1491 DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) 1492 << "<def> = PHI("); 1493 1494 bool HasExternalEdge = false; 1495 unsigned NumInputs = getPHINumInputs(PHI); 1496 for (unsigned i = 0; i < NumInputs; ++i) { 1497 if (!isPHIRegionIndex(PHIRegionIndices, i)) { 1498 HasExternalEdge = true; 1499 } 1500 } 1501 1502 if (HasExternalEdge) { 1503 MachineBasicBlock *MBB = PHI.getParent(); 1504 MachineInstrBuilder MIB = 1505 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1506 getPHIDestReg(PHI)); 1507 MIB.addReg(CombinedSourceReg); 1508 MIB.addMBB(LastMerge); 1509 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1510 << LastMerge->getNumber()); 1511 for (unsigned i = 0; i < NumInputs; ++i) { 1512 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1513 continue; 1514 } 1515 unsigned SourceReg = getPHISourceReg(PHI, i); 1516 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1517 MIB.addReg(SourceReg); 1518 MIB.addMBB(SourcePred); 1519 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1520 << SourcePred->getNumber()); 1521 } 1522 DEBUG(dbgs() << ")\n"); 1523 } else { 1524 replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); 1525 } 1526 PHI.eraseFromParent(); 1527 } 1528 1529 void AMDGPUMachineCFGStructurizer::replaceEntryPHI( 1530 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, 1531 SmallVector<unsigned, 2> &PHIRegionIndices) { 1532 1533 DEBUG(dbgs() << "Replace entry PHI: "); 1534 DEBUG(PHI.dump()); 1535 DEBUG(dbgs() << " with "); 1536 1537 unsigned NumInputs = getPHINumInputs(PHI); 1538 unsigned NumNonRegionInputs = NumInputs; 1539 for (unsigned i = 0; i < NumInputs; ++i) { 1540 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1541 NumNonRegionInputs--; 1542 } 1543 } 1544 1545 if (NumNonRegionInputs == 0) { 1546 auto DestReg = getPHIDestReg(PHI); 1547 replaceRegisterWith(DestReg, CombinedSourceReg); 1548 DEBUG(dbgs() << " register " << PrintReg(CombinedSourceReg, TRI) << "\n"); 1549 PHI.eraseFromParent(); 1550 } else { 1551 DEBUG(dbgs() << PrintReg(getPHIDestReg(PHI), TRI) << "<def> = PHI("); 1552 MachineBasicBlock *MBB = PHI.getParent(); 1553 MachineInstrBuilder MIB = 1554 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1555 getPHIDestReg(PHI)); 1556 MIB.addReg(CombinedSourceReg); 1557 MIB.addMBB(IfMBB); 1558 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1559 << IfMBB->getNumber()); 1560 unsigned NumInputs = getPHINumInputs(PHI); 1561 for (unsigned i = 0; i < NumInputs; ++i) { 1562 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1563 continue; 1564 } 1565 unsigned SourceReg = getPHISourceReg(PHI, i); 1566 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1567 MIB.addReg(SourceReg); 1568 MIB.addMBB(SourcePred); 1569 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1570 << SourcePred->getNumber()); 1571 } 1572 DEBUG(dbgs() << ")\n"); 1573 PHI.eraseFromParent(); 1574 } 1575 } 1576 1577 void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs( 1578 MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices, 1579 unsigned CombinedSourceReg, LinearizedRegion *LRegion) { 1580 bool WasLiveOut = false; 1581 for (auto PII : PHIRegionIndices) { 1582 unsigned Reg = getPHISourceReg(PHI, PII); 1583 if (LRegion->isLiveOut(Reg)) { 1584 bool IsDead = true; 1585 1586 // Check if register is live out of the basic block 1587 MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent(); 1588 for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) { 1589 if ((*UI).getParent()->getParent() != DefMBB) { 1590 IsDead = false; 1591 } 1592 } 1593 1594 DEBUG(dbgs() << "Register " << PrintReg(Reg, TRI) << " is " 1595 << (IsDead ? "dead" : "alive") << " after PHI replace\n"); 1596 if (IsDead) { 1597 LRegion->removeLiveOut(Reg); 1598 } 1599 WasLiveOut = true; 1600 } 1601 } 1602 1603 if (WasLiveOut) 1604 LRegion->addLiveOut(CombinedSourceReg); 1605 } 1606 1607 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region, 1608 MachineBasicBlock *LastMerge, 1609 MachineInstr &PHI, 1610 LinearizedRegion *LRegion) { 1611 SmallVector<unsigned, 2> PHIRegionIndices; 1612 getPHIRegionIndices(Region, PHI, PHIRegionIndices); 1613 unsigned LinearizedSourceReg = 1614 storePHILinearizationInfo(PHI, &PHIRegionIndices); 1615 1616 replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices); 1617 replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion); 1618 } 1619 1620 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region, 1621 MachineBasicBlock *IfMBB, 1622 MachineInstr &PHI) { 1623 SmallVector<unsigned, 2> PHINonRegionIndices; 1624 getPHINonRegionIndices(Region, PHI, PHINonRegionIndices); 1625 unsigned LinearizedSourceReg = 1626 storePHILinearizationInfo(PHI, &PHINonRegionIndices); 1627 replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices); 1628 } 1629 1630 static void collectPHIs(MachineBasicBlock *MBB, 1631 SmallVector<MachineInstr *, 2> &PHIs) { 1632 for (auto &BBI : *MBB) { 1633 if (BBI.isPHI()) { 1634 PHIs.push_back(&BBI); 1635 } 1636 } 1637 } 1638 1639 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region, 1640 MachineBasicBlock *LastMerge, 1641 LinearizedRegion *LRegion) { 1642 SmallVector<MachineInstr *, 2> PHIs; 1643 auto Exit = Region->getSucc(); 1644 if (Exit == nullptr) 1645 return; 1646 1647 collectPHIs(Exit, PHIs); 1648 1649 for (auto PHII : PHIs) { 1650 rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion); 1651 } 1652 } 1653 1654 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region, 1655 MachineBasicBlock *IfMBB) { 1656 SmallVector<MachineInstr *, 2> PHIs; 1657 auto Entry = Region->getEntry(); 1658 1659 collectPHIs(Entry, PHIs); 1660 1661 for (auto PHII : PHIs) { 1662 rewriteRegionEntryPHI(Region, IfMBB, *PHII); 1663 } 1664 } 1665 1666 void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, 1667 MachineBasicBlock *Dest, 1668 const DebugLoc &DL) { 1669 DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber() 1670 << " -> " << Dest->getNumber() << "\n"); 1671 MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator(); 1672 bool HasTerminator = Terminator != MBB->instr_end(); 1673 if (HasTerminator) { 1674 TII->ReplaceTailWithBranchTo(Terminator, Dest); 1675 } 1676 if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) { 1677 TII->insertUnconditionalBranch(*MBB, Dest, DL); 1678 } 1679 } 1680 1681 static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) { 1682 MachineBasicBlock *result = nullptr; 1683 for (auto &MFI : MF) { 1684 if (MFI.succ_size() == 0) { 1685 if (result == nullptr) { 1686 result = &MFI; 1687 } else { 1688 return nullptr; 1689 } 1690 } 1691 } 1692 1693 return result; 1694 } 1695 1696 static bool hasOneExitNode(MachineFunction &MF) { 1697 return getSingleExitNode(MF) != nullptr; 1698 } 1699 1700 MachineBasicBlock * 1701 AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) { 1702 auto Exit = Region->getSucc(); 1703 1704 // If the exit is the end of the function, we just use the existing 1705 MachineFunction *MF = Region->getEntry()->getParent(); 1706 if (Exit == nullptr && hasOneExitNode(*MF)) { 1707 return &(*(--(Region->getEntry()->getParent()->end()))); 1708 } 1709 1710 MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock(); 1711 if (Exit == nullptr) { 1712 MachineFunction::iterator ExitIter = MF->end(); 1713 MF->insert(ExitIter, LastMerge); 1714 } else { 1715 MachineFunction::iterator ExitIter = Exit->getIterator(); 1716 MF->insert(ExitIter, LastMerge); 1717 LastMerge->addSuccessor(Exit); 1718 insertUnconditionalBranch(LastMerge, Exit); 1719 DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n"); 1720 } 1721 return LastMerge; 1722 } 1723 1724 void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, 1725 MachineBasicBlock *CodeBB, 1726 MachineBasicBlock *MergeBB, 1727 unsigned DestRegister, 1728 unsigned IfSourceRegister, 1729 unsigned CodeSourceRegister, 1730 bool IsUndefIfSource) { 1731 // If this is the function exit block, we don't need a phi. 1732 if (MergeBB->succ_begin() == MergeBB->succ_end()) { 1733 return; 1734 } 1735 DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber() 1736 << "): " << PrintReg(DestRegister, TRI) << "<def> = PHI(" 1737 << PrintReg(IfSourceRegister, TRI) << ", BB#" 1738 << IfBB->getNumber() << PrintReg(CodeSourceRegister, TRI) 1739 << ", BB#" << CodeBB->getNumber() << ")\n"); 1740 const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin()); 1741 MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, 1742 TII->get(TargetOpcode::PHI), DestRegister); 1743 if (IsUndefIfSource && false) { 1744 MIB.addReg(IfSourceRegister, RegState::Undef); 1745 } else { 1746 MIB.addReg(IfSourceRegister); 1747 } 1748 MIB.addMBB(IfBB); 1749 MIB.addReg(CodeSourceRegister); 1750 MIB.addMBB(CodeBB); 1751 } 1752 1753 static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) { 1754 for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), 1755 E = MBB->succ_end(); 1756 PI != E; ++PI) { 1757 if ((*PI) != MBB) { 1758 (MBB)->removeSuccessor(*PI); 1759 } 1760 } 1761 } 1762 1763 static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, 1764 MachineBasicBlock *EndMBB) { 1765 1766 // We have to check against the StartMBB successor becasuse a 1767 // structurized region with a loop will have the entry block split, 1768 // and the backedge will go to the entry successor. 1769 DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs; 1770 unsigned SuccSize = StartMBB->succ_size(); 1771 if (SuccSize > 0) { 1772 MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); 1773 for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), 1774 E = EndMBB->succ_end(); 1775 PI != E; ++PI) { 1776 // Either we have a back-edge to the entry block, or a back-edge to the 1777 // succesor of the entry block since the block may be split. 1778 if ((*PI) != StartMBB && 1779 !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { 1780 Succs.insert( 1781 std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI)); 1782 } 1783 } 1784 } 1785 1786 for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), 1787 E = StartMBB->pred_end(); 1788 PI != E; ++PI) { 1789 if ((*PI) != EndMBB) { 1790 Succs.insert( 1791 std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB)); 1792 } 1793 } 1794 1795 for (auto SI : Succs) { 1796 std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI; 1797 DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#" 1798 << Edge.second->getNumber() << "\n"); 1799 Edge.first->removeSuccessor(Edge.second); 1800 } 1801 } 1802 1803 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( 1804 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart, 1805 MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg, 1806 bool InheritPreds) { 1807 MachineFunction *MF = MergeBB->getParent(); 1808 MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); 1809 1810 if (InheritPreds) { 1811 for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), 1812 E = CodeBBStart->pred_end(); 1813 PI != E; ++PI) { 1814 if ((*PI) != CodeBBEnd) { 1815 MachineBasicBlock *Pred = (*PI); 1816 Pred->addSuccessor(IfBB); 1817 } 1818 } 1819 } 1820 1821 removeExternalCFGEdges(CodeBBStart, CodeBBEnd); 1822 1823 auto CodeBBStartI = CodeBBStart->getIterator(); 1824 auto CodeBBEndI = CodeBBEnd->getIterator(); 1825 auto MergeIter = MergeBB->getIterator(); 1826 MF->insert(MergeIter, IfBB); 1827 MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI); 1828 IfBB->addSuccessor(MergeBB); 1829 IfBB->addSuccessor(CodeBBStart); 1830 1831 DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); 1832 // Ensure that the MergeBB is a succesor of the CodeEndBB. 1833 if (!CodeBBEnd->isSuccessor(MergeBB)) 1834 CodeBBEnd->addSuccessor(MergeBB); 1835 1836 DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#" 1837 << CodeBBEnd->getNumber() << "\n"); 1838 1839 // If we have a single predecessor we can find a reasonable debug location 1840 MachineBasicBlock *SinglePred = 1841 CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr; 1842 const DebugLoc &DL = SinglePred 1843 ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator()) 1844 : DebugLoc(); 1845 1846 unsigned Reg = 1847 TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg, 1848 SelectBB->getNumber() /* CodeBBStart->getNumber() */); 1849 if (&(*(IfBB->getParent()->begin())) == IfBB) { 1850 TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg, 1851 CodeBBStart->getNumber()); 1852 } 1853 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); 1854 ArrayRef<MachineOperand> Cond(RegOp); 1855 TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL); 1856 1857 return IfBB; 1858 } 1859 1860 void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled( 1861 SmallVector<MachineOperand, 1> Cond) { 1862 if (Cond.size() != 1) 1863 return; 1864 if (!Cond[0].isReg()) 1865 return; 1866 1867 unsigned CondReg = Cond[0].getReg(); 1868 for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { 1869 (*UI).setIsKill(false); 1870 } 1871 } 1872 1873 void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, 1874 MachineBasicBlock *MergeBB, 1875 unsigned BBSelectReg) { 1876 MachineBasicBlock *TrueBB = nullptr; 1877 MachineBasicBlock *FalseBB = nullptr; 1878 SmallVector<MachineOperand, 1> Cond; 1879 MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB]; 1880 TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond); 1881 1882 const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator()); 1883 1884 if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) { 1885 // This is an exit block, hence no successors. We will assign the 1886 // bb select register to the entry block. 1887 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1888 BBSelectReg, 1889 CodeBB->getParent()->begin()->getNumber()); 1890 insertUnconditionalBranch(CodeBB, MergeBB, DL); 1891 return; 1892 } 1893 1894 if (FalseBB == nullptr && TrueBB == nullptr) { 1895 TrueBB = FallthroughBB; 1896 } else if (TrueBB != nullptr) { 1897 FalseBB = 1898 (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB; 1899 } 1900 1901 if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) { 1902 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1903 BBSelectReg, TrueBB->getNumber()); 1904 } else { 1905 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); 1906 unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); 1907 unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); 1908 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1909 TrueBBReg, TrueBB->getNumber()); 1910 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1911 FalseBBReg, FalseBB->getNumber()); 1912 ensureCondIsNotKilled(Cond); 1913 TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL, 1914 BBSelectReg, Cond, TrueBBReg, FalseBBReg); 1915 } 1916 1917 insertUnconditionalBranch(CodeBB, MergeBB, DL); 1918 } 1919 1920 MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) { 1921 if (MRI->def_begin(Reg) == MRI->def_end()) { 1922 DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1923 << " has NO defs\n"); 1924 } else if (!MRI->hasOneDef(Reg)) { 1925 DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1926 << " has multiple defs\n"); 1927 DEBUG(dbgs() << "DEFS BEGIN:\n"); 1928 for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { 1929 DEBUG(DI->getParent()->dump()); 1930 } 1931 DEBUG(dbgs() << "DEFS END\n"); 1932 } 1933 1934 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 1935 return (*(MRI->def_begin(Reg))).getParent(); 1936 } 1937 1938 void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, 1939 MachineBasicBlock *CodeBB, 1940 MachineBasicBlock *MergeBB, 1941 LinearizedRegion *InnerRegion, 1942 unsigned DestReg, 1943 unsigned SourceReg) { 1944 // In this function we know we are part of a chain already, so we need 1945 // to add the registers to the existing chain, and rename the register 1946 // inside the region. 1947 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); 1948 MachineInstr *DefInstr = getDefInstr(SourceReg); 1949 if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) { 1950 // Handle the case where the def is a PHI-def inside a basic 1951 // block, then we only need to do renaming. Special care needs to 1952 // be taken if the PHI-def is part of an existing chain, or if a 1953 // new one needs to be created. 1954 InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI); 1955 1956 // We collect all PHI Information, and if we are at the region entry, 1957 // all PHIs will be removed, and then re-introduced if needed. 1958 storePHILinearizationInfoDest(DestReg, *DefInstr); 1959 // We have picked up all the information we need now and can remove 1960 // the PHI 1961 PHIInfo.removeSource(DestReg, SourceReg, CodeBB); 1962 DefInstr->eraseFromParent(); 1963 } else { 1964 // If this is not a phi-def, or it is a phi-def but from a linearized region 1965 if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) { 1966 // If this is a single BB and the definition is in this block we 1967 // need to replace any uses outside the region. 1968 InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); 1969 } 1970 const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); 1971 unsigned NextDestReg = MRI->createVirtualRegister(RegClass); 1972 bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; 1973 DEBUG(dbgs() << "Insert Chained PHI\n"); 1974 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, 1975 SourceReg, IsLastDef); 1976 1977 PHIInfo.removeSource(DestReg, SourceReg, CodeBB); 1978 if (IsLastDef) { 1979 const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator()); 1980 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL, 1981 NextDestReg, 0); 1982 PHIInfo.deleteDef(DestReg); 1983 } else { 1984 PHIInfo.replaceDef(DestReg, NextDestReg); 1985 } 1986 } 1987 } 1988 1989 bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB, 1990 LinearizedRegion *InnerRegion, 1991 unsigned Register) { 1992 return getDefInstr(Register)->getParent() == MBB || 1993 InnerRegion->contains(getDefInstr(Register)->getParent()); 1994 } 1995 1996 void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, 1997 MachineBasicBlock *CodeBB, 1998 MachineBasicBlock *MergeBB, 1999 LinearizedRegion *InnerRegion, 2000 LinearizedRegion *LRegion) { 2001 DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts(); 2002 SmallVector<unsigned, 4> OldLiveOuts; 2003 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); 2004 for (auto OLI : *LiveOuts) { 2005 OldLiveOuts.push_back(OLI); 2006 } 2007 2008 for (auto LI : OldLiveOuts) { 2009 DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI)); 2010 if (!containsDef(CodeBB, InnerRegion, LI) || 2011 (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) { 2012 // If the register simly lives through the CodeBB, we don't have 2013 // to rewrite anything since the register is not defined in this 2014 // part of the code. 2015 DEBUG(dbgs() << "- through"); 2016 continue; 2017 } 2018 DEBUG(dbgs() << "\n"); 2019 unsigned Reg = LI; 2020 if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) { 2021 // If the register is live out, we do want to create a phi, 2022 // unless it is from the Exit block, becasuse in that case there 2023 // is already a PHI, and no need to create a new one. 2024 2025 // If the register is just a live out def and not part of a phi 2026 // chain, we need to create a PHI node to handle the if region, 2027 // and replace all uses outside of the region with the new dest 2028 // register, unless it is the outgoing BB select register. We have 2029 // already creaed phi nodes for these. 2030 const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); 2031 unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); 2032 unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); 2033 // Create initializer, this value is never used, but is needed 2034 // to satisfy SSA. 2035 DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n"); 2036 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), 2037 IfSourceReg, 0); 2038 2039 InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); 2040 DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); 2041 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, 2042 IfSourceReg, Reg, true); 2043 } 2044 } 2045 2046 // Handle the chained definitions in PHIInfo, checking if this basic block 2047 // is a source block for a definition. 2048 SmallVector<unsigned, 4> Sources; 2049 if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) { 2050 DEBUG(dbgs() << "Inserting PHI Live Out from BB#" << CodeBB->getNumber() 2051 << "\n"); 2052 for (auto SI : Sources) { 2053 unsigned DestReg; 2054 PHIInfo.findDest(SI, CodeBB, DestReg); 2055 insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI); 2056 } 2057 DEBUG(dbgs() << "Insertion done.\n"); 2058 } 2059 2060 DEBUG(PHIInfo.dump(MRI)); 2061 } 2062 2063 void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) { 2064 DEBUG(dbgs() << "Before PHI Prune\n"); 2065 DEBUG(PHIInfo.dump(MRI)); 2066 SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4> 2067 ElimiatedSources; 2068 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2069 ++DRI) { 2070 2071 unsigned DestReg = *DRI; 2072 auto SE = PHIInfo.sources_end(DestReg); 2073 2074 bool MBBContainsPHISource = false; 2075 // Check if there is a PHI source in this MBB 2076 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2077 unsigned SourceReg = (*SRI).first; 2078 MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); 2079 if (Def->getParent()->getParent() == MBB) { 2080 MBBContainsPHISource = true; 2081 } 2082 } 2083 2084 // If so, all other sources are useless since we know this block 2085 // is always executed when the region is executed. 2086 if (MBBContainsPHISource) { 2087 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2088 PHILinearize::PHISourceT Source = *SRI; 2089 unsigned SourceReg = Source.first; 2090 MachineBasicBlock *SourceMBB = Source.second; 2091 MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); 2092 if (Def->getParent()->getParent() != MBB) { 2093 ElimiatedSources.push_back( 2094 std::make_tuple(DestReg, SourceReg, SourceMBB)); 2095 } 2096 } 2097 } 2098 } 2099 2100 // Remove the PHI sources that are in the given MBB 2101 for (auto &SourceInfo : ElimiatedSources) { 2102 PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo), 2103 std::get<2>(SourceInfo)); 2104 } 2105 DEBUG(dbgs() << "After PHI Prune\n"); 2106 DEBUG(PHIInfo.dump(MRI)); 2107 } 2108 2109 void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion, 2110 unsigned DestReg) { 2111 MachineBasicBlock *Entry = CurrentRegion->getEntry(); 2112 MachineBasicBlock *Exit = CurrentRegion->getExit(); 2113 2114 DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() 2115 << " Pred: " << (*(Entry->pred_begin()))->getNumber() << "\n"); 2116 2117 int NumSources = 0; 2118 auto SE = PHIInfo.sources_end(DestReg); 2119 2120 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2121 NumSources++; 2122 } 2123 2124 if (NumSources == 1) { 2125 auto SRI = PHIInfo.sources_begin(DestReg); 2126 unsigned SourceReg = (*SRI).first; 2127 replaceRegisterWith(DestReg, SourceReg); 2128 } else { 2129 const DebugLoc &DL = Entry->findDebugLoc(Entry->begin()); 2130 MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, 2131 TII->get(TargetOpcode::PHI), DestReg); 2132 DEBUG(dbgs() << "Entry PHI " << PrintReg(DestReg, TRI) << "<def> = PHI("); 2133 2134 unsigned CurrentBackedgeReg = 0; 2135 2136 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2137 unsigned SourceReg = (*SRI).first; 2138 2139 if (CurrentRegion->contains((*SRI).second)) { 2140 if (CurrentBackedgeReg == 0) { 2141 CurrentBackedgeReg = SourceReg; 2142 } else { 2143 MachineInstr *PHIDefInstr = getDefInstr(SourceReg); 2144 MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent(); 2145 const TargetRegisterClass *RegClass = 2146 MRI->getRegClass(CurrentBackedgeReg); 2147 unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass); 2148 MachineInstrBuilder BackedgePHI = 2149 BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL, 2150 TII->get(TargetOpcode::PHI), NewBackedgeReg); 2151 BackedgePHI.addReg(CurrentBackedgeReg); 2152 BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0)); 2153 BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1)); 2154 BackedgePHI.addMBB((*SRI).second); 2155 CurrentBackedgeReg = NewBackedgeReg; 2156 DEBUG(dbgs() << "Inserting backedge PHI: " 2157 << PrintReg(NewBackedgeReg, TRI) << "<def> = PHI(" 2158 << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" 2159 << getPHIPred(*PHIDefInstr, 0)->getNumber() << ", " 2160 << PrintReg(getPHISourceReg(*PHIDefInstr, 1), TRI) 2161 << ", BB#" << (*SRI).second->getNumber()); 2162 } 2163 } else { 2164 MIB.addReg(SourceReg); 2165 MIB.addMBB((*SRI).second); 2166 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 2167 << (*SRI).second->getNumber() << ", "); 2168 } 2169 } 2170 2171 // Add the final backedge register source to the entry phi 2172 if (CurrentBackedgeReg != 0) { 2173 MIB.addReg(CurrentBackedgeReg); 2174 MIB.addMBB(Exit); 2175 DEBUG(dbgs() << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" 2176 << Exit->getNumber() << ")\n"); 2177 } else { 2178 DEBUG(dbgs() << ")\n"); 2179 } 2180 } 2181 } 2182 2183 void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { 2184 DEBUG(PHIInfo.dump(MRI)); 2185 2186 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2187 ++DRI) { 2188 2189 unsigned DestReg = *DRI; 2190 createEntryPHI(CurrentRegion, DestReg); 2191 } 2192 PHIInfo.clear(); 2193 } 2194 2195 void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, 2196 unsigned NewRegister) { 2197 assert(Register != NewRegister && "Cannot replace a reg with itself"); 2198 2199 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 2200 E = MRI->reg_end(); 2201 I != E;) { 2202 MachineOperand &O = *I; 2203 ++I; 2204 if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { 2205 DEBUG(dbgs() << "Trying to substitute physical register: " 2206 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 2207 << "\n"); 2208 llvm_unreachable("Cannot substitute physical registers"); 2209 // We don't handle physical registers, but if we need to 2210 // in the future This is how we do it: 2211 // O.substPhysReg(NewRegister, *TRI); 2212 } else { 2213 DEBUG(dbgs() << "Replacing register: " 2214 << PrintReg(Register, MRI->getTargetRegisterInfo()) 2215 << " with " 2216 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 2217 << "\n"); 2218 O.setReg(NewRegister); 2219 } 2220 } 2221 PHIInfo.deleteDef(Register); 2222 2223 getRegionMRT()->replaceLiveOutReg(Register, NewRegister); 2224 2225 DEBUG(PHIInfo.dump(MRI)); 2226 } 2227 2228 void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) { 2229 DEBUG(dbgs() << "Resolve PHI Infos\n"); 2230 DEBUG(PHIInfo.dump(MRI)); 2231 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2232 ++DRI) { 2233 unsigned DestReg = *DRI; 2234 DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n"); 2235 auto SRI = PHIInfo.sources_begin(DestReg); 2236 unsigned SourceReg = (*SRI).first; 2237 DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) 2238 << " SourceReg: " << PrintReg(SourceReg, TRI) << "\n"); 2239 2240 assert(PHIInfo.sources_end(DestReg) == ++SRI && 2241 "More than one phi source in entry node"); 2242 replaceRegisterWith(DestReg, SourceReg); 2243 } 2244 } 2245 2246 static bool isFunctionEntryBlock(MachineBasicBlock *MBB) { 2247 return ((&(*(MBB->getParent()->begin()))) == MBB); 2248 } 2249 2250 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( 2251 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB, 2252 LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn, 2253 unsigned BBSelectRegOut) { 2254 if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) { 2255 // Handle non-loop function entry block. 2256 // We need to allow loops to the entry block and then 2257 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); 2258 resolvePHIInfos(CodeBB); 2259 removeExternalCFGSuccessors(CodeBB); 2260 CodeBB->addSuccessor(MergeBB); 2261 CurrentRegion->addMBB(CodeBB); 2262 return nullptr; 2263 } 2264 if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) { 2265 // Handle non-loop region entry block. 2266 MachineFunction *MF = MergeBB->getParent(); 2267 auto MergeIter = MergeBB->getIterator(); 2268 auto CodeBBStartIter = CodeBB->getIterator(); 2269 auto CodeBBEndIter = ++(CodeBB->getIterator()); 2270 if (CodeBBEndIter != MergeIter) { 2271 MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter); 2272 } 2273 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); 2274 prunePHIInfo(CodeBB); 2275 createEntryPHIs(CurrentRegion); 2276 removeExternalCFGSuccessors(CodeBB); 2277 CodeBB->addSuccessor(MergeBB); 2278 CurrentRegion->addMBB(CodeBB); 2279 return nullptr; 2280 } else { 2281 // Handle internal block. 2282 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); 2283 unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); 2284 rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); 2285 bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; 2286 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, 2287 BBSelectRegIn, IsRegionEntryBB); 2288 CurrentRegion->addMBB(IfBB); 2289 // If this is the entry block we need to make the If block the new 2290 // linearized region entry. 2291 if (IsRegionEntryBB) { 2292 CurrentRegion->setEntry(IfBB); 2293 2294 if (CurrentRegion->getHasLoop()) { 2295 MachineBasicBlock *RegionExit = CurrentRegion->getExit(); 2296 MachineBasicBlock *ETrueBB = nullptr; 2297 MachineBasicBlock *EFalseBB = nullptr; 2298 SmallVector<MachineOperand, 1> ECond; 2299 2300 const DebugLoc &DL = DebugLoc(); 2301 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); 2302 TII->removeBranch(*RegionExit); 2303 2304 // We need to create a backedge if there is a loop 2305 unsigned Reg = TII->insertNE( 2306 RegionExit, RegionExit->instr_end(), DL, 2307 CurrentRegion->getRegionMRT()->getInnerOutputRegister(), 2308 CurrentRegion->getRegionMRT()->getEntry()->getNumber()); 2309 MachineOperand RegOp = 2310 MachineOperand::CreateReg(Reg, false, false, true); 2311 ArrayRef<MachineOperand> Cond(RegOp); 2312 DEBUG(dbgs() << "RegionExitReg: "); 2313 DEBUG(Cond[0].print(dbgs(), TRI)); 2314 DEBUG(dbgs() << "\n"); 2315 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, 2316 Cond, DebugLoc()); 2317 RegionExit->addSuccessor(CurrentRegion->getEntry()); 2318 } 2319 } 2320 CurrentRegion->addMBB(CodeBB); 2321 LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); 2322 2323 InnerRegion.setParent(CurrentRegion); 2324 DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); 2325 insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, 2326 CodeBBSelectReg); 2327 InnerRegion.addMBB(MergeBB); 2328 2329 DEBUG(InnerRegion.print(dbgs(), TRI)); 2330 rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); 2331 extractKilledPHIs(CodeBB); 2332 if (IsRegionEntryBB) { 2333 createEntryPHIs(CurrentRegion); 2334 } 2335 return IfBB; 2336 } 2337 } 2338 2339 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( 2340 MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion, 2341 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, 2342 unsigned BBSelectRegIn, unsigned BBSelectRegOut) { 2343 unsigned CodeBBSelectReg = 2344 InnerRegion->getRegionMRT()->getInnerOutputRegister(); 2345 MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry(); 2346 MachineBasicBlock *CodeExitBB = InnerRegion->getExit(); 2347 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB, 2348 SelectBB, BBSelectRegIn, true); 2349 CurrentRegion->addMBB(IfBB); 2350 bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry(); 2351 if (isEntry) { 2352 2353 if (CurrentRegion->getHasLoop()) { 2354 MachineBasicBlock *RegionExit = CurrentRegion->getExit(); 2355 MachineBasicBlock *ETrueBB = nullptr; 2356 MachineBasicBlock *EFalseBB = nullptr; 2357 SmallVector<MachineOperand, 1> ECond; 2358 2359 const DebugLoc &DL = DebugLoc(); 2360 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); 2361 TII->removeBranch(*RegionExit); 2362 2363 // We need to create a backedge if there is a loop 2364 unsigned Reg = 2365 TII->insertNE(RegionExit, RegionExit->instr_end(), DL, 2366 CurrentRegion->getRegionMRT()->getInnerOutputRegister(), 2367 CurrentRegion->getRegionMRT()->getEntry()->getNumber()); 2368 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); 2369 ArrayRef<MachineOperand> Cond(RegOp); 2370 DEBUG(dbgs() << "RegionExitReg: "); 2371 DEBUG(Cond[0].print(dbgs(), TRI)); 2372 DEBUG(dbgs() << "\n"); 2373 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, 2374 Cond, DebugLoc()); 2375 RegionExit->addSuccessor(IfBB); 2376 } 2377 } 2378 CurrentRegion->addMBBs(InnerRegion); 2379 DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); 2380 insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, 2381 CodeBBSelectReg); 2382 2383 rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion, 2384 CurrentRegion); 2385 2386 rewriteRegionEntryPHIs(InnerRegion, IfBB); 2387 2388 if (isEntry) { 2389 CurrentRegion->setEntry(IfBB); 2390 } 2391 2392 if (isEntry) { 2393 createEntryPHIs(CurrentRegion); 2394 } 2395 2396 return IfBB; 2397 } 2398 2399 void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, 2400 MachineBasicBlock *Entry, 2401 MachineBasicBlock *EntrySucc, 2402 LinearizedRegion *LRegion) { 2403 SmallVector<unsigned, 2> PHIRegionIndices; 2404 getPHIRegionIndices(LRegion, PHI, PHIRegionIndices); 2405 2406 assert(PHIRegionIndices.size() == 1); 2407 2408 unsigned RegionIndex = PHIRegionIndices[0]; 2409 unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex); 2410 MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex); 2411 unsigned PHIDest = getPHIDestReg(PHI); 2412 unsigned PHISource = PHIDest; 2413 unsigned ReplaceReg; 2414 2415 if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) { 2416 PHISource = ReplaceReg; 2417 } 2418 2419 const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); 2420 unsigned NewDestReg = MRI->createVirtualRegister(RegClass); 2421 LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); 2422 MachineInstrBuilder MIB = 2423 BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), 2424 TII->get(TargetOpcode::PHI), NewDestReg); 2425 DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI) 2426 << "<def> = PHI("); 2427 MIB.addReg(PHISource); 2428 MIB.addMBB(Entry); 2429 DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber()); 2430 MIB.addReg(RegionSourceReg); 2431 MIB.addMBB(RegionSourceMBB); 2432 DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << ", BB#" 2433 << RegionSourceMBB->getNumber() << ")\n"); 2434 } 2435 2436 void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, 2437 MachineBasicBlock *EntrySucc, 2438 LinearizedRegion *LRegion) { 2439 SmallVector<MachineInstr *, 2> PHIs; 2440 collectPHIs(Entry, PHIs); 2441 2442 for (auto PHII : PHIs) { 2443 splitLoopPHI(*PHII, Entry, EntrySucc, LRegion); 2444 } 2445 } 2446 2447 // Split the exit block so that we can insert a end control flow 2448 MachineBasicBlock * 2449 AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) { 2450 auto MRTRegion = LRegion->getRegionMRT(); 2451 auto Exit = LRegion->getExit(); 2452 auto MF = Exit->getParent(); 2453 auto Succ = MRTRegion->getSucc(); 2454 2455 auto NewExit = MF->CreateMachineBasicBlock(); 2456 auto AfterExitIter = Exit->getIterator(); 2457 AfterExitIter++; 2458 MF->insert(AfterExitIter, NewExit); 2459 Exit->removeSuccessor(Succ); 2460 Exit->addSuccessor(NewExit); 2461 NewExit->addSuccessor(Succ); 2462 insertUnconditionalBranch(NewExit, Succ); 2463 LRegion->addMBB(NewExit); 2464 LRegion->setExit(NewExit); 2465 2466 DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n"); 2467 2468 // Replace any PHI Predecessors in the successor with NewExit 2469 for (auto &II : *Succ) { 2470 MachineInstr &Instr = II; 2471 2472 // If we are past the PHI instructions we are done 2473 if (!Instr.isPHI()) 2474 break; 2475 2476 int numPreds = getPHINumInputs(Instr); 2477 for (int i = 0; i < numPreds; ++i) { 2478 auto Pred = getPHIPred(Instr, i); 2479 if (Pred == Exit) { 2480 setPhiPred(Instr, i, NewExit); 2481 } 2482 } 2483 } 2484 2485 return NewExit; 2486 } 2487 2488 2489 static MachineBasicBlock *split(MachineBasicBlock::iterator I) { 2490 // Create the fall-through block. 2491 MachineBasicBlock *MBB = (*I).getParent(); 2492 MachineFunction *MF = MBB->getParent(); 2493 MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock(); 2494 auto MBBIter = ++(MBB->getIterator()); 2495 MF->insert(MBBIter, SuccMBB); 2496 SuccMBB->transferSuccessorsAndUpdatePHIs(MBB); 2497 MBB->addSuccessor(SuccMBB); 2498 2499 // Splice the code over. 2500 SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end()); 2501 2502 return SuccMBB; 2503 } 2504 2505 // Split the entry block separating PHI-nodes and the rest of the code 2506 // This is needed to insert an initializer for the bb select register 2507 // inloop regions. 2508 2509 MachineBasicBlock * 2510 AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { 2511 MachineBasicBlock *Entry = LRegion->getEntry(); 2512 MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI()); 2513 MachineBasicBlock *Exit = LRegion->getExit(); 2514 2515 DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#" 2516 << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber() 2517 << "\n"); 2518 LRegion->addMBB(EntrySucc); 2519 2520 // Make the backedge go to Entry Succ 2521 if (Exit->isSuccessor(Entry)) { 2522 Exit->removeSuccessor(Entry); 2523 } 2524 Exit->addSuccessor(EntrySucc); 2525 MachineInstr &Branch = *(Exit->instr_rbegin()); 2526 for (auto &UI : Branch.uses()) { 2527 if (UI.isMBB() && UI.getMBB() == Entry) { 2528 UI.setMBB(EntrySucc); 2529 } 2530 } 2531 2532 splitLoopPHIs(Entry, EntrySucc, LRegion); 2533 2534 return EntrySucc; 2535 } 2536 2537 LinearizedRegion * 2538 AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) { 2539 LinearizedRegion *LRegion = Region->getLinearizedRegion(); 2540 LRegion->initLiveOut(Region, MRI, TRI, PHIInfo); 2541 LRegion->setEntry(Region->getEntry()); 2542 return LRegion; 2543 } 2544 2545 static void removeOldExitPreds(RegionMRT *Region) { 2546 MachineBasicBlock *Exit = Region->getSucc(); 2547 if (Exit == nullptr) { 2548 return; 2549 } 2550 for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), 2551 E = Exit->pred_end(); 2552 PI != E; ++PI) { 2553 if (Region->contains(*PI)) { 2554 (*PI)->removeSuccessor(Exit); 2555 } 2556 } 2557 } 2558 2559 static bool mbbHasBackEdge(MachineBasicBlock *MBB, 2560 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { 2561 for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { 2562 if (MBBs.count(*SI) != 0) { 2563 return true; 2564 } 2565 } 2566 return false; 2567 } 2568 2569 static bool containsNewBackedge(MRT *Tree, 2570 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { 2571 // Need to traverse this in reverse since it is in post order. 2572 if (Tree == nullptr) 2573 return false; 2574 2575 if (Tree->isMBB()) { 2576 MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB(); 2577 MBBs.insert(MBB); 2578 if (mbbHasBackEdge(MBB, MBBs)) { 2579 return true; 2580 } 2581 } else { 2582 RegionMRT *Region = Tree->getRegionMRT(); 2583 SetVector<MRT *> *Children = Region->getChildren(); 2584 for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) { 2585 if (containsNewBackedge(*CI, MBBs)) 2586 return true; 2587 } 2588 } 2589 return false; 2590 } 2591 2592 static bool containsNewBackedge(RegionMRT *Region) { 2593 SmallPtrSet<MachineBasicBlock *, 8> MBBs; 2594 return containsNewBackedge(Region, MBBs); 2595 } 2596 2597 bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { 2598 auto *LRegion = initLinearizedRegion(Region); 2599 LRegion->setHasLoop(containsNewBackedge(Region)); 2600 MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region); 2601 MachineBasicBlock *CurrentMerge = LastMerge; 2602 LRegion->addMBB(LastMerge); 2603 LRegion->setExit(LastMerge); 2604 2605 rewriteRegionExitPHIs(Region, LastMerge, LRegion); 2606 removeOldExitPreds(Region); 2607 2608 DEBUG(PHIInfo.dump(MRI)); 2609 2610 SetVector<MRT *> *Children = Region->getChildren(); 2611 DEBUG(dbgs() << "===========If Region Start===============\n"); 2612 if (LRegion->getHasLoop()) { 2613 DEBUG(dbgs() << "Has Backedge: Yes\n"); 2614 } else { 2615 DEBUG(dbgs() << "Has Backedge: No\n"); 2616 } 2617 2618 unsigned BBSelectRegIn; 2619 unsigned BBSelectRegOut; 2620 for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { 2621 DEBUG(dbgs() << "CurrentRegion: \n"); 2622 DEBUG(LRegion->print(dbgs(), TRI)); 2623 2624 auto CNI = CI; 2625 ++CNI; 2626 2627 MRT *Child = (*CI); 2628 2629 if (Child->isRegion()) { 2630 2631 LinearizedRegion *InnerLRegion = 2632 Child->getRegionMRT()->getLinearizedRegion(); 2633 // We found the block is the exit of an inner region, we need 2634 // to put it in the current linearized region. 2635 2636 DEBUG(dbgs() << "Linearizing region: "); 2637 DEBUG(InnerLRegion->print(dbgs(), TRI)); 2638 DEBUG(dbgs() << "\n"); 2639 2640 MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); 2641 if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) { 2642 // Entry has already been linearized, no need to do this region. 2643 unsigned OuterSelect = InnerLRegion->getBBSelectRegOut(); 2644 unsigned InnerSelectReg = 2645 InnerLRegion->getRegionMRT()->getInnerOutputRegister(); 2646 replaceRegisterWith(InnerSelectReg, OuterSelect), 2647 resolvePHIInfos(InnerEntry); 2648 if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge)) 2649 InnerLRegion->getExit()->addSuccessor(CurrentMerge); 2650 continue; 2651 } 2652 2653 BBSelectRegOut = Child->getBBSelectRegOut(); 2654 BBSelectRegIn = Child->getBBSelectRegIn(); 2655 2656 DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) 2657 << "\n"); 2658 DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) 2659 << "\n"); 2660 2661 MachineBasicBlock *IfEnd = CurrentMerge; 2662 CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, 2663 Child->getRegionMRT()->getEntry(), 2664 BBSelectRegIn, BBSelectRegOut); 2665 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); 2666 } else { 2667 MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); 2668 DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); 2669 2670 if (MBB == getSingleExitNode(*(MBB->getParent()))) { 2671 // If this is the exit block then we need to skip to the next. 2672 // The "in" register will be transferred to "out" in the next 2673 // iteration. 2674 continue; 2675 } 2676 2677 BBSelectRegOut = Child->getBBSelectRegOut(); 2678 BBSelectRegIn = Child->getBBSelectRegIn(); 2679 2680 DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) 2681 << "\n"); 2682 DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) 2683 << "\n"); 2684 2685 MachineBasicBlock *IfEnd = CurrentMerge; 2686 // This is a basic block that is not part of an inner region, we 2687 // need to put it in the current linearized region. 2688 CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn, 2689 BBSelectRegOut); 2690 if (CurrentMerge) { 2691 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); 2692 } 2693 2694 DEBUG(PHIInfo.dump(MRI)); 2695 } 2696 } 2697 2698 LRegion->removeFalseRegisterKills(MRI); 2699 2700 if (LRegion->getHasLoop()) { 2701 MachineBasicBlock *NewSucc = splitEntry(LRegion); 2702 if (isFunctionEntryBlock(LRegion->getEntry())) { 2703 resolvePHIInfos(LRegion->getEntry()); 2704 } 2705 const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); 2706 unsigned InReg = LRegion->getBBSelectRegIn(); 2707 unsigned InnerSelectReg = 2708 MRI->createVirtualRegister(MRI->getRegClass(InReg)); 2709 unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); 2710 TII->materializeImmediate(*(LRegion->getEntry()), 2711 LRegion->getEntry()->getFirstTerminator(), DL, 2712 NewInReg, Region->getEntry()->getNumber()); 2713 // Need to be careful about updating the registers inside the region. 2714 LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI); 2715 DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n"); 2716 insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc, 2717 InnerSelectReg, NewInReg, 2718 LRegion->getRegionMRT()->getInnerOutputRegister()); 2719 splitExit(LRegion); 2720 TII->convertNonUniformLoopRegion(NewSucc, LastMerge); 2721 } 2722 2723 if (Region->isRoot()) { 2724 TII->insertReturn(*LastMerge); 2725 } 2726 2727 DEBUG(Region->getEntry()->getParent()->dump()); 2728 DEBUG(LRegion->print(dbgs(), TRI)); 2729 DEBUG(PHIInfo.dump(MRI)); 2730 2731 DEBUG(dbgs() << "===========If Region End===============\n"); 2732 2733 Region->setLinearizedRegion(LRegion); 2734 return true; 2735 } 2736 2737 bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) { 2738 if (false && regionIsSimpleIf(Region)) { 2739 transformSimpleIfRegion(Region); 2740 return true; 2741 } else if (regionIsSequence(Region)) { 2742 fixupRegionExits(Region); 2743 return false; 2744 } else { 2745 structurizeComplexRegion(Region); 2746 } 2747 return false; 2748 } 2749 2750 static int structurize_once = 0; 2751 2752 bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region, 2753 bool isTopRegion) { 2754 bool Changed = false; 2755 2756 auto Children = Region->getChildren(); 2757 for (auto CI : *Children) { 2758 if (CI->isRegion()) { 2759 Changed |= structurizeRegions(CI->getRegionMRT(), false); 2760 } 2761 } 2762 2763 if (structurize_once < 2 || true) { 2764 Changed |= structurizeRegion(Region); 2765 structurize_once++; 2766 } 2767 return Changed; 2768 } 2769 2770 void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { 2771 DEBUG(dbgs() << "Fallthrough Map:\n"); 2772 for (auto &MBBI : MF) { 2773 MachineBasicBlock *MBB = MBBI.getFallThrough(); 2774 if (MBB != nullptr) { 2775 DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> " 2776 << MBB->getNumber() << "\n"); 2777 } 2778 FallthroughMap[&MBBI] = MBB; 2779 } 2780 } 2781 2782 void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region, 2783 unsigned SelectOut) { 2784 LinearizedRegion *LRegion = new LinearizedRegion(); 2785 if (SelectOut) { 2786 LRegion->addLiveOut(SelectOut); 2787 DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(SelectOut, TRI) 2788 << "\n"); 2789 } 2790 LRegion->setRegionMRT(Region); 2791 Region->setLinearizedRegion(LRegion); 2792 LRegion->setParent(Region->getParent() 2793 ? Region->getParent()->getLinearizedRegion() 2794 : nullptr); 2795 } 2796 2797 unsigned 2798 AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut, 2799 MachineRegisterInfo *MRI, 2800 const SIInstrInfo *TII) { 2801 if (MRT->isRegion()) { 2802 RegionMRT *Region = MRT->getRegionMRT(); 2803 Region->setBBSelectRegOut(SelectOut); 2804 unsigned InnerSelectOut = createBBSelectReg(TII, MRI); 2805 2806 // Fixme: Move linearization creation to the original spot 2807 createLinearizedRegion(Region, SelectOut); 2808 2809 for (auto CI = Region->getChildren()->begin(), 2810 CE = Region->getChildren()->end(); 2811 CI != CE; ++CI) { 2812 InnerSelectOut = 2813 initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII); 2814 } 2815 MRT->setBBSelectRegIn(InnerSelectOut); 2816 return InnerSelectOut; 2817 } else { 2818 MRT->setBBSelectRegOut(SelectOut); 2819 unsigned NewSelectIn = createBBSelectReg(TII, MRI); 2820 MRT->setBBSelectRegIn(NewSelectIn); 2821 return NewSelectIn; 2822 } 2823 } 2824 2825 static void checkRegOnlyPHIInputs(MachineFunction &MF) { 2826 for (auto &MBBI : MF) { 2827 for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(), 2828 E = MBBI.instr_end(); 2829 I != E; ++I) { 2830 MachineInstr &Instr = *I; 2831 if (Instr.isPHI()) { 2832 int numPreds = getPHINumInputs(Instr); 2833 for (int i = 0; i < numPreds; ++i) { 2834 assert(Instr.getOperand(i * 2 + 1).isReg() && 2835 "PHI Operand not a register"); 2836 } 2837 } 2838 } 2839 } 2840 } 2841 2842 2843 INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", 2844 "AMDGPU Machine CFG Structurizer", false, false) 2845 INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) 2846 INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", 2847 "AMDGPU Machine CFG Structurizer", false, false) 2848 2849 char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID; 2850 2851 2852 bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { 2853 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 2854 const SIInstrInfo *TII = ST.getInstrInfo(); 2855 TRI = ST.getRegisterInfo(); 2856 MRI = &(MF.getRegInfo()); 2857 initFallthroughMap(MF); 2858 2859 checkRegOnlyPHIInputs(MF); 2860 DEBUG(dbgs() << "----STRUCTURIZER START----\n"); 2861 DEBUG(MF.dump()); 2862 2863 Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo()); 2864 DEBUG(Regions->dump()); 2865 2866 RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI); 2867 setRegionMRT(RTree); 2868 initializeSelectRegisters(RTree, 0, MRI, TII); 2869 DEBUG(RTree->dump(TRI)); 2870 bool result = structurizeRegions(RTree, true); 2871 delete RTree; 2872 DEBUG(dbgs() << "----STRUCTURIZER END----\n"); 2873 initFallthroughMap(MF); 2874 return result; 2875 } 2876 2877 FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() { 2878 return new AMDGPUMachineCFGStructurizer(); 2879 } 2880