1 //===-- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the machine instruction level CFG structurizer pass. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "AMDGPU.h" 15 #include "SIInstrInfo.h" 16 #include "AMDGPUSubtarget.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/PostOrderIterator.h" 19 #include "llvm/ADT/SetVector.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/SmallVector.h" 22 #include "llvm/Analysis/CFG.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineRegionInfo.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/Passes.h" 30 #include "llvm/IR/DebugLoc.h" 31 #include "llvm/Support/Debug.h" 32 #include "llvm/Target/TargetInstrInfo.h" 33 #include "llvm/Target/TargetLowering.h" 34 #include "llvm/Target/TargetSubtargetInfo.h" 35 #include <tuple> 36 using namespace llvm; 37 38 #define DEBUG_TYPE "amdgpucfgstructurizer" 39 40 namespace { 41 class PHILinearizeDestIterator; 42 43 class PHILinearize { 44 friend class PHILinearizeDestIterator; 45 46 public: 47 typedef std::pair<unsigned, MachineBasicBlock *> PHISourceT; 48 49 private: 50 typedef DenseSet<PHISourceT> PHISourcesT; 51 typedef struct { 52 unsigned DestReg; 53 DebugLoc DL; 54 PHISourcesT Sources; 55 } PHIInfoElementT; 56 typedef SmallPtrSet<PHIInfoElementT *, 2> PHIInfoT; 57 PHIInfoT PHIInfo; 58 59 static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); 60 static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef); 61 static DebugLoc phiInfoElementGetDebugLoc(PHIInfoElementT *Info); 62 static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info); 63 static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg, 64 MachineBasicBlock *SourceMBB); 65 static void phiInfoElementRemoveSource(PHIInfoElementT *Info, 66 unsigned SourceReg, 67 MachineBasicBlock *SourceMBB); 68 PHIInfoElementT *findPHIInfoElement(unsigned DestReg); 69 PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg, 70 MachineBasicBlock *SourceMBB); 71 72 public: 73 bool findSourcesFromMBB(MachineBasicBlock *SourceMBB, 74 SmallVector<unsigned, 4> &Sources); 75 void addDest(unsigned DestReg, const DebugLoc &DL); 76 void replaceDef(unsigned OldDestReg, unsigned NewDestReg); 77 void deleteDef(unsigned DestReg); 78 DebugLoc getDebugLoc(unsigned DestReg); 79 void addSource(unsigned DestReg, unsigned SourceReg, 80 MachineBasicBlock *SourceMBB); 81 void removeSource(unsigned DestReg, unsigned SourceReg, 82 MachineBasicBlock *SourceMBB = nullptr); 83 bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, 84 unsigned &DestReg); 85 bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr); 86 unsigned getNumSources(unsigned DestReg); 87 void dump(MachineRegisterInfo *MRI); 88 void clear(); 89 90 typedef PHISourcesT::iterator source_iterator; 91 typedef PHILinearizeDestIterator dest_iterator; 92 93 dest_iterator dests_begin(); 94 dest_iterator dests_end(); 95 96 source_iterator sources_begin(unsigned Reg); 97 source_iterator sources_end(unsigned Reg); 98 }; 99 100 class PHILinearizeDestIterator { 101 private: 102 PHILinearize::PHIInfoT::iterator Iter; 103 104 public: 105 unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); } 106 PHILinearizeDestIterator &operator++() { 107 ++Iter; 108 return *this; 109 } 110 bool operator==(const PHILinearizeDestIterator &I) const { 111 return I.Iter == Iter; 112 } 113 bool operator!=(const PHILinearizeDestIterator &I) const { 114 return I.Iter != Iter; 115 } 116 117 PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} 118 }; 119 120 unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { 121 return Info->DestReg; 122 } 123 124 void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info, 125 unsigned NewDef) { 126 Info->DestReg = NewDef; 127 } 128 129 DebugLoc PHILinearize::phiInfoElementGetDebugLoc(PHIInfoElementT *Info) { 130 return Info->DL; 131 } 132 133 PHILinearize::PHISourcesT & 134 PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) { 135 return Info->Sources; 136 } 137 138 void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info, 139 unsigned SourceReg, 140 MachineBasicBlock *SourceMBB) { 141 // Assertion ensures we don't use the same SourceMBB for the 142 // sources, because we cannot have different registers with 143 // identical predecessors, but we can have the same register for 144 // multiple predecessors. 145 for (auto SI : phiInfoElementGetSources(Info)) { 146 assert((SI.second != SourceMBB || SourceReg == SI.first)); 147 } 148 149 phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB)); 150 } 151 152 void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info, 153 unsigned SourceReg, 154 MachineBasicBlock *SourceMBB) { 155 auto &Sources = phiInfoElementGetSources(Info); 156 SmallVector<PHISourceT, 4> ElimiatedSources; 157 for (auto SI : Sources) { 158 if (SI.first == SourceReg && 159 (SI.second == nullptr || SI.second == SourceMBB)) { 160 ElimiatedSources.push_back(PHISourceT(SI.first, SI.second)); 161 } 162 } 163 164 for (auto &Source : ElimiatedSources) { 165 Sources.erase(Source); 166 } 167 } 168 169 PHILinearize::PHIInfoElementT * 170 PHILinearize::findPHIInfoElement(unsigned DestReg) { 171 for (auto I : PHIInfo) { 172 if (phiInfoElementGetDest(I) == DestReg) { 173 return I; 174 } 175 } 176 return nullptr; 177 } 178 179 PHILinearize::PHIInfoElementT * 180 PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg, 181 MachineBasicBlock *SourceMBB) { 182 for (auto I : PHIInfo) { 183 for (auto SI : phiInfoElementGetSources(I)) { 184 if (SI.first == SourceReg && 185 (SI.second == nullptr || SI.second == SourceMBB)) { 186 return I; 187 } 188 } 189 } 190 return nullptr; 191 } 192 193 bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB, 194 SmallVector<unsigned, 4> &Sources) { 195 bool FoundSource = false; 196 for (auto I : PHIInfo) { 197 for (auto SI : phiInfoElementGetSources(I)) { 198 if (SI.second == SourceMBB) { 199 FoundSource = true; 200 Sources.push_back(SI.first); 201 } 202 } 203 } 204 return FoundSource; 205 } 206 207 void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) { 208 assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists"); 209 PHISourcesT EmptySet; 210 PHIInfoElementT *NewElement = new PHIInfoElementT(); 211 NewElement->DestReg = DestReg; 212 NewElement->DL = DL; 213 NewElement->Sources = EmptySet; 214 PHIInfo.insert(NewElement); 215 } 216 217 void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) { 218 phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg); 219 } 220 221 void PHILinearize::deleteDef(unsigned DestReg) { 222 PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg); 223 PHIInfo.erase(InfoElement); 224 delete InfoElement; 225 } 226 227 DebugLoc PHILinearize::getDebugLoc(unsigned DestReg) { 228 return phiInfoElementGetDebugLoc(findPHIInfoElement(DestReg)); 229 } 230 231 void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg, 232 MachineBasicBlock *SourceMBB) { 233 phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); 234 } 235 236 void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg, 237 MachineBasicBlock *SourceMBB) { 238 phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); 239 } 240 241 bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, 242 unsigned &DestReg) { 243 PHIInfoElementT *InfoElement = 244 findPHIInfoElementFromSource(SourceReg, SourceMBB); 245 if (InfoElement != nullptr) { 246 DestReg = phiInfoElementGetDest(InfoElement); 247 return true; 248 } 249 return false; 250 } 251 252 bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) { 253 unsigned DestReg; 254 return findDest(Reg, SourceMBB, DestReg); 255 } 256 257 unsigned PHILinearize::getNumSources(unsigned DestReg) { 258 return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size(); 259 } 260 261 void PHILinearize::dump(MachineRegisterInfo *MRI) { 262 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 263 DEBUG(dbgs() << "=PHIInfo Start=\n"); 264 for (auto PII : this->PHIInfo) { 265 PHIInfoElementT &Element = *PII; 266 DEBUG(dbgs() << "Dest: " << PrintReg(Element.DestReg, TRI) 267 << " Sources: {"); 268 for (auto &SI : Element.Sources) { 269 DEBUG(dbgs() << PrintReg(SI.first, TRI) << "(BB#" 270 << SI.second->getNumber() << "),"); 271 } 272 DEBUG(dbgs() << "}\n"); 273 } 274 DEBUG(dbgs() << "=PHIInfo End=\n"); 275 } 276 277 void PHILinearize::clear() { PHIInfo = PHIInfoT(); } 278 279 PHILinearize::dest_iterator PHILinearize::dests_begin() { 280 return PHILinearizeDestIterator(PHIInfo.begin()); 281 } 282 283 PHILinearize::dest_iterator PHILinearize::dests_end() { 284 return PHILinearizeDestIterator(PHIInfo.end()); 285 } 286 287 PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) { 288 auto InfoElement = findPHIInfoElement(Reg); 289 return phiInfoElementGetSources(InfoElement).begin(); 290 } 291 PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) { 292 auto InfoElement = findPHIInfoElement(Reg); 293 return phiInfoElementGetSources(InfoElement).end(); 294 } 295 296 class RegionMRT; 297 class MBBMRT; 298 299 static unsigned getPHINumInputs(MachineInstr &PHI) { 300 assert(PHI.isPHI()); 301 return (PHI.getNumOperands() - 1) / 2; 302 } 303 304 static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) { 305 assert(PHI.isPHI()); 306 return PHI.getOperand(Index * 2 + 2).getMBB(); 307 } 308 309 static void setPhiPred(MachineInstr &PHI, unsigned Index, 310 MachineBasicBlock *NewPred) { 311 PHI.getOperand(Index * 2 + 2).setMBB(NewPred); 312 } 313 314 static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) { 315 assert(PHI.isPHI()); 316 return PHI.getOperand(Index * 2 + 1).getReg(); 317 } 318 319 static unsigned getPHIDestReg(MachineInstr &PHI) { 320 assert(PHI.isPHI()); 321 return PHI.getOperand(0).getReg(); 322 } 323 324 class LinearizedRegion { 325 protected: 326 MachineBasicBlock *Entry; 327 // The exit block is part of the region, and is the last 328 // merge block before exiting the region. 329 MachineBasicBlock *Exit; 330 DenseSet<unsigned> LiveOuts; 331 SmallPtrSet<MachineBasicBlock *, 1> MBBs; 332 bool HasLoop; 333 LinearizedRegion *Parent; 334 RegionMRT *RMRT; 335 336 void storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, 337 MachineInstr *DefInstr, const MachineRegisterInfo *MRI, 338 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 339 340 void storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, 341 MachineInstr *DefInstr, 342 const MachineRegisterInfo *MRI, 343 const TargetRegisterInfo *TRI, 344 PHILinearize &PHIInfo); 345 346 void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 347 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, 348 RegionMRT *TopRegion); 349 350 void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 351 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 352 353 void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI, 354 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, 355 RegionMRT *TopRegion = nullptr); 356 357 public: 358 void setRegionMRT(RegionMRT *Region) { RMRT = Region; } 359 360 RegionMRT *getRegionMRT() { return RMRT; } 361 362 void setParent(LinearizedRegion *P) { Parent = P; } 363 364 LinearizedRegion *getParent() { return Parent; } 365 366 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr); 367 368 void setBBSelectRegIn(unsigned Reg); 369 370 unsigned getBBSelectRegIn(); 371 372 void setBBSelectRegOut(unsigned Reg, bool IsLiveOut); 373 374 unsigned getBBSelectRegOut(); 375 376 void setHasLoop(bool Value); 377 378 bool getHasLoop(); 379 380 void addLiveOut(unsigned VReg); 381 382 void addLiveOuts(LinearizedRegion *LRegion); 383 384 void removeLiveOut(unsigned Reg); 385 386 void replaceLiveOut(unsigned OldReg, unsigned NewReg); 387 388 void replaceRegister(unsigned Register, unsigned NewRegister, 389 MachineRegisterInfo *MRI, bool ReplaceInside, 390 bool ReplaceOutside, bool IncludeLoopPHIs); 391 392 void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister, 393 bool IncludeLoopPHIs, 394 MachineRegisterInfo *MRI); 395 396 void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister, 397 bool IncludeLoopPHIs, 398 MachineRegisterInfo *MRI); 399 400 DenseSet<unsigned> *getLiveOuts(); 401 402 void setEntry(MachineBasicBlock *NewEntry); 403 404 MachineBasicBlock *getEntry(); 405 406 void setExit(MachineBasicBlock *NewExit); 407 408 MachineBasicBlock *getExit(); 409 410 void addMBB(MachineBasicBlock *MBB); 411 412 void addMBBs(LinearizedRegion *InnerRegion); 413 414 bool contains(MachineBasicBlock *MBB); 415 416 bool isLiveOut(unsigned Reg); 417 418 bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI); 419 420 bool isDefinedInRegion(unsigned Reg, MachineRegisterInfo *MRI); 421 422 void removeFalseRegisterKills(MachineRegisterInfo *MRI); 423 424 void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI, 425 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 426 427 LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 428 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 429 430 LinearizedRegion(RegionMRT *Region, const MachineRegisterInfo *MRI, 431 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 432 433 LinearizedRegion(); 434 435 ~LinearizedRegion(); 436 }; 437 438 class MRT { 439 protected: 440 RegionMRT *Parent; 441 unsigned BBSelectRegIn; 442 unsigned BBSelectRegOut; 443 444 public: 445 unsigned getBBSelectRegIn() { return BBSelectRegIn; } 446 447 unsigned getBBSelectRegOut() { return BBSelectRegOut; } 448 449 void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; } 450 451 void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; } 452 453 virtual RegionMRT *getRegionMRT() { return nullptr; } 454 455 virtual MBBMRT *getMBBMRT() { return nullptr; } 456 457 bool isRegion() { return getRegionMRT() != nullptr; } 458 459 bool isMBB() { return getMBBMRT() != nullptr; } 460 461 bool isRoot() { return Parent == nullptr; } 462 463 void setParent(RegionMRT *Region) { Parent = Region; } 464 465 RegionMRT *getParent() { return Parent; } 466 467 static MachineBasicBlock * 468 initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, 469 DenseMap<MachineRegion *, RegionMRT *> &RegionMap); 470 471 static RegionMRT *buildMRT(MachineFunction &MF, 472 const MachineRegionInfo *RegionInfo, 473 const SIInstrInfo *TII, 474 MachineRegisterInfo *MRI); 475 476 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0; 477 478 void dumpDepth(int depth) { 479 for (int i = depth; i > 0; --i) { 480 dbgs() << " "; 481 } 482 } 483 484 virtual ~MRT() {} 485 }; 486 487 class MBBMRT : public MRT { 488 MachineBasicBlock *MBB; 489 490 public: 491 virtual MBBMRT *getMBBMRT() { return this; } 492 493 MachineBasicBlock *getMBB() { return MBB; } 494 495 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { 496 dumpDepth(depth); 497 dbgs() << "MBB: " << getMBB()->getNumber(); 498 dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); 499 dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; 500 } 501 502 MBBMRT(MachineBasicBlock *BB) : MBB(BB) { 503 setParent(nullptr); 504 setBBSelectRegOut(0); 505 setBBSelectRegIn(0); 506 } 507 }; 508 509 class RegionMRT : public MRT { 510 protected: 511 MachineRegion *Region; 512 LinearizedRegion *LRegion; 513 MachineBasicBlock *Succ; 514 515 SetVector<MRT *> Children; 516 517 public: 518 virtual RegionMRT *getRegionMRT() { return this; } 519 520 void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { 521 LRegion = LinearizeRegion; 522 } 523 524 LinearizedRegion *getLinearizedRegion() { return LRegion; } 525 526 MachineRegion *getMachineRegion() { return Region; } 527 528 unsigned getInnerOutputRegister() { 529 return (*(Children.begin()))->getBBSelectRegOut(); 530 } 531 532 void addChild(MRT *Tree) { Children.insert(Tree); } 533 534 SetVector<MRT *> *getChildren() { return &Children; } 535 536 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) { 537 dumpDepth(depth); 538 dbgs() << "Region: " << (void *)Region; 539 dbgs() << " In: " << PrintReg(getBBSelectRegIn(), TRI); 540 dbgs() << ", Out: " << PrintReg(getBBSelectRegOut(), TRI) << "\n"; 541 542 dumpDepth(depth); 543 if (getSucc()) 544 dbgs() << "Succ: " << getSucc()->getNumber() << "\n"; 545 else 546 dbgs() << "Succ: none \n"; 547 for (auto MRTI : Children) { 548 MRTI->dump(TRI, depth + 1); 549 } 550 } 551 552 MRT *getEntryTree() { return Children.back(); } 553 554 MRT *getExitTree() { return Children.front(); } 555 556 MachineBasicBlock *getEntry() { 557 MRT *Tree = Children.back(); 558 return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry() 559 : Tree->getMBBMRT()->getMBB(); 560 } 561 562 MachineBasicBlock *getExit() { 563 MRT *Tree = Children.front(); 564 return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit() 565 : Tree->getMBBMRT()->getMBB(); 566 } 567 568 void setSucc(MachineBasicBlock *MBB) { Succ = MBB; } 569 570 MachineBasicBlock *getSucc() { return Succ; } 571 572 bool contains(MachineBasicBlock *MBB) { 573 for (auto CI : Children) { 574 if (CI->isMBB()) { 575 if (MBB == CI->getMBBMRT()->getMBB()) { 576 return true; 577 } 578 } else { 579 if (CI->getRegionMRT()->contains(MBB)) { 580 return true; 581 } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr && 582 CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) { 583 return true; 584 } 585 } 586 } 587 return false; 588 } 589 590 void replaceLiveOutReg(unsigned Register, unsigned NewRegister) { 591 LinearizedRegion *LRegion = getLinearizedRegion(); 592 LRegion->replaceLiveOut(Register, NewRegister); 593 for (auto &CI : Children) { 594 if (CI->isRegion()) { 595 CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister); 596 } 597 } 598 } 599 600 RegionMRT(MachineRegion *MachineRegion) 601 : Region(MachineRegion), LRegion(nullptr), Succ(nullptr) { 602 setParent(nullptr); 603 setBBSelectRegOut(0); 604 setBBSelectRegIn(0); 605 } 606 607 virtual ~RegionMRT() { 608 if (LRegion) { 609 delete LRegion; 610 } 611 612 for (auto CI : Children) { 613 delete &(*CI); 614 } 615 } 616 }; 617 618 static unsigned createBBSelectReg(const SIInstrInfo *TII, 619 MachineRegisterInfo *MRI) { 620 return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); 621 } 622 623 MachineBasicBlock * 624 MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, 625 DenseMap<MachineRegion *, RegionMRT *> &RegionMap) { 626 for (auto &MFI : MF) { 627 MachineBasicBlock *ExitMBB = &MFI; 628 if (ExitMBB->succ_size() == 0) { 629 return ExitMBB; 630 } 631 } 632 llvm_unreachable("CFG has no exit block"); 633 return nullptr; 634 } 635 636 RegionMRT *MRT::buildMRT(MachineFunction &MF, 637 const MachineRegionInfo *RegionInfo, 638 const SIInstrInfo *TII, MachineRegisterInfo *MRI) { 639 SmallPtrSet<MachineRegion *, 4> PlacedRegions; 640 DenseMap<MachineRegion *, RegionMRT *> RegionMap; 641 MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion(); 642 RegionMRT *Result = new RegionMRT(TopLevelRegion); 643 RegionMap[TopLevelRegion] = Result; 644 645 // Insert the exit block first, we need it to be the merge node 646 // for the top level region. 647 MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap); 648 649 unsigned BBSelectRegIn = createBBSelectReg(TII, MRI); 650 MBBMRT *ExitMRT = new MBBMRT(Exit); 651 RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT); 652 ExitMRT->setBBSelectRegIn(BBSelectRegIn); 653 654 for (auto MBBI : post_order(&(MF.front()))) { 655 MachineBasicBlock *MBB = &(*MBBI); 656 657 // Skip Exit since we already added it 658 if (MBB == Exit) { 659 continue; 660 } 661 662 DEBUG(dbgs() << "Visiting BB#" << MBB->getNumber() << "\n"); 663 MBBMRT *NewMBB = new MBBMRT(MBB); 664 MachineRegion *Region = RegionInfo->getRegionFor(MBB); 665 666 // Ensure we have the MRT region 667 if (RegionMap.count(Region) == 0) { 668 RegionMRT *NewMRTRegion = new RegionMRT(Region); 669 RegionMap[Region] = NewMRTRegion; 670 671 // Ensure all parents are in the RegionMap 672 MachineRegion *Parent = Region->getParent(); 673 while (RegionMap.count(Parent) == 0) { 674 RegionMRT *NewMRTParent = new RegionMRT(Parent); 675 NewMRTParent->addChild(NewMRTRegion); 676 NewMRTRegion->setParent(NewMRTParent); 677 RegionMap[Parent] = NewMRTParent; 678 NewMRTRegion = NewMRTParent; 679 Parent = Parent->getParent(); 680 } 681 RegionMap[Parent]->addChild(NewMRTRegion); 682 NewMRTRegion->setParent(RegionMap[Parent]); 683 } 684 685 // Add MBB to Region MRT 686 RegionMap[Region]->addChild(NewMBB); 687 NewMBB->setParent(RegionMap[Region]); 688 RegionMap[Region]->setSucc(Region->getExit()); 689 } 690 return Result; 691 } 692 693 void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, unsigned Reg, 694 MachineInstr *DefInstr, 695 const MachineRegisterInfo *MRI, 696 const TargetRegisterInfo *TRI, 697 PHILinearize &PHIInfo) { 698 if (TRI->isVirtualRegister(Reg)) { 699 DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); 700 // If this is a source register to a PHI we are chaining, it 701 // must be live out. 702 if (PHIInfo.isSource(Reg)) { 703 DEBUG(dbgs() << "Add LiveOut (PHI): " << PrintReg(Reg, TRI) << "\n"); 704 addLiveOut(Reg); 705 } else { 706 // If this is live out of the MBB 707 for (auto &UI : MRI->use_operands(Reg)) { 708 if (UI.getParent()->getParent() != MBB) { 709 DEBUG(dbgs() << "Add LiveOut (MBB BB#" << MBB->getNumber() 710 << "): " << PrintReg(Reg, TRI) << "\n"); 711 addLiveOut(Reg); 712 } else { 713 // If the use is in the same MBB we have to make sure 714 // it is after the def, otherwise it is live out in a loop 715 MachineInstr *UseInstr = UI.getParent(); 716 for (MachineBasicBlock::instr_iterator 717 MII = UseInstr->getIterator(), 718 MIE = UseInstr->getParent()->instr_end(); 719 MII != MIE; ++MII) { 720 if ((&(*MII)) == DefInstr) { 721 DEBUG(dbgs() << "Add LiveOut (Loop): " << PrintReg(Reg, TRI) 722 << "\n"); 723 addLiveOut(Reg); 724 } 725 } 726 } 727 } 728 } 729 } 730 } 731 732 void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, unsigned Reg, 733 MachineInstr *DefInstr, 734 const MachineRegisterInfo *MRI, 735 const TargetRegisterInfo *TRI, 736 PHILinearize &PHIInfo) { 737 if (TRI->isVirtualRegister(Reg)) { 738 DEBUG(dbgs() << "Considering Register: " << PrintReg(Reg, TRI) << "\n"); 739 for (auto &UI : MRI->use_operands(Reg)) { 740 if (!Region->contains(UI.getParent()->getParent())) { 741 DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region 742 << "): " << PrintReg(Reg, TRI) << "\n"); 743 addLiveOut(Reg); 744 } 745 } 746 } 747 } 748 749 void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, 750 const MachineRegisterInfo *MRI, 751 const TargetRegisterInfo *TRI, 752 PHILinearize &PHIInfo) { 753 DEBUG(dbgs() << "-Store Live Outs Begin (BB#" << MBB->getNumber() << ")-\n"); 754 for (auto &II : *MBB) { 755 for (auto &RI : II.defs()) { 756 storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); 757 } 758 for (auto &IRI : II.implicit_operands()) { 759 if (IRI.isDef()) { 760 storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo); 761 } 762 } 763 } 764 765 // If we have a successor with a PHI, source coming from this MBB we have to 766 // add the register as live out 767 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 768 E = MBB->succ_end(); 769 SI != E; ++SI) { 770 for (auto &II : *(*SI)) { 771 if (II.isPHI()) { 772 MachineInstr &PHI = II; 773 int numPreds = getPHINumInputs(PHI); 774 for (int i = 0; i < numPreds; ++i) { 775 if (getPHIPred(PHI, i) == MBB) { 776 unsigned PHIReg = getPHISourceReg(PHI, i); 777 DEBUG(dbgs() << "Add LiveOut (PhiSource BB#" << MBB->getNumber() 778 << " -> BB#" << (*SI)->getNumber() 779 << "): " << PrintReg(PHIReg, TRI) << "\n"); 780 addLiveOut(PHIReg); 781 } 782 } 783 } 784 } 785 } 786 787 DEBUG(dbgs() << "-Store Live Outs Endn-\n"); 788 } 789 790 void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB, 791 const MachineRegisterInfo *MRI, 792 const TargetRegisterInfo *TRI, 793 PHILinearize &PHIInfo, 794 RegionMRT *TopRegion) { 795 for (auto &II : *MBB) { 796 for (auto &RI : II.defs()) { 797 storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI, 798 PHIInfo); 799 } 800 for (auto &IRI : II.implicit_operands()) { 801 if (IRI.isDef()) { 802 storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI, 803 TRI, PHIInfo); 804 } 805 } 806 } 807 } 808 809 void LinearizedRegion::storeLiveOuts(RegionMRT *Region, 810 const MachineRegisterInfo *MRI, 811 const TargetRegisterInfo *TRI, 812 PHILinearize &PHIInfo, 813 RegionMRT *CurrentTopRegion) { 814 MachineBasicBlock *Exit = Region->getSucc(); 815 816 RegionMRT *TopRegion = 817 CurrentTopRegion == nullptr ? Region : CurrentTopRegion; 818 819 // Check if exit is end of function, if so, no live outs. 820 if (Exit == nullptr) 821 return; 822 823 auto Children = Region->getChildren(); 824 for (auto CI : *Children) { 825 if (CI->isMBB()) { 826 auto MBB = CI->getMBBMRT()->getMBB(); 827 storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion); 828 } else { 829 LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion(); 830 // We should be limited to only store registers that are live out from the 831 // lineaized region 832 for (auto MBBI : SubRegion->MBBs) { 833 storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion); 834 } 835 } 836 } 837 838 if (CurrentTopRegion == nullptr) { 839 auto Succ = Region->getSucc(); 840 for (auto &II : *Succ) { 841 if (II.isPHI()) { 842 MachineInstr &PHI = II; 843 int numPreds = getPHINumInputs(PHI); 844 for (int i = 0; i < numPreds; ++i) { 845 if (Region->contains(getPHIPred(PHI, i))) { 846 unsigned PHIReg = getPHISourceReg(PHI, i); 847 DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region 848 << "): " << PrintReg(PHIReg, TRI) << "\n"); 849 addLiveOut(PHIReg); 850 } 851 } 852 } 853 } 854 } 855 } 856 857 void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { 858 OS << "Linearized Region {"; 859 bool IsFirst = true; 860 for (const auto &MBB : MBBs) { 861 if (IsFirst) { 862 IsFirst = false; 863 } else { 864 OS << " ,"; 865 } 866 OS << MBB->getNumber(); 867 } 868 OS << "} (" << Entry->getNumber() << ", " 869 << (Exit == nullptr ? -1 : Exit->getNumber()) 870 << "): In:" << PrintReg(getBBSelectRegIn(), TRI) 871 << " Out:" << PrintReg(getBBSelectRegOut(), TRI) << " {"; 872 for (auto &LI : LiveOuts) { 873 OS << PrintReg(LI, TRI) << " "; 874 } 875 OS << "} \n"; 876 } 877 878 unsigned LinearizedRegion::getBBSelectRegIn() { 879 return getRegionMRT()->getBBSelectRegIn(); 880 } 881 882 unsigned LinearizedRegion::getBBSelectRegOut() { 883 return getRegionMRT()->getBBSelectRegOut(); 884 } 885 886 void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; } 887 888 bool LinearizedRegion::getHasLoop() { return HasLoop; } 889 890 void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); } 891 892 void LinearizedRegion::addLiveOuts(LinearizedRegion *LRegion) { 893 DenseSet<unsigned> *RegionLiveOuts = LRegion->getLiveOuts(); 894 for (auto R : *RegionLiveOuts) { 895 addLiveOut(R); 896 } 897 } 898 899 void LinearizedRegion::removeLiveOut(unsigned Reg) { 900 if (isLiveOut(Reg)) 901 LiveOuts.erase(Reg); 902 } 903 904 void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) { 905 if (isLiveOut(OldReg)) { 906 removeLiveOut(OldReg); 907 addLiveOut(NewReg); 908 } 909 } 910 911 void LinearizedRegion::replaceRegister(unsigned Register, unsigned NewRegister, 912 MachineRegisterInfo *MRI, 913 bool ReplaceInside, bool ReplaceOutside, 914 bool IncludeLoopPHI) { 915 assert(Register != NewRegister && "Cannot replace a reg with itself"); 916 917 DEBUG(dbgs() << "Pepareing to replace register (region): " 918 << PrintReg(Register, MRI->getTargetRegisterInfo()) << " with " 919 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); 920 921 // If we are replacing outside, we also need to update the LiveOuts 922 if (ReplaceOutside && 923 (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) { 924 LinearizedRegion *Current = this; 925 while (Current != nullptr && Current->getEntry() != nullptr) { 926 DEBUG(dbgs() << "Region before register replace\n"); 927 DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); 928 Current->replaceLiveOut(Register, NewRegister); 929 DEBUG(dbgs() << "Region after register replace\n"); 930 DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); 931 Current = Current->getParent(); 932 } 933 } 934 935 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 936 E = MRI->reg_end(); 937 I != E;) { 938 MachineOperand &O = *I; 939 ++I; 940 941 // We don't rewrite defs. 942 if (O.isDef()) 943 continue; 944 945 bool IsInside = contains(O.getParent()->getParent()); 946 bool IsLoopPHI = IsInside && (O.getParent()->isPHI() && 947 O.getParent()->getParent() == getEntry()); 948 bool ShouldReplace = (IsInside && ReplaceInside) || 949 (!IsInside && ReplaceOutside) || 950 (IncludeLoopPHI && IsLoopPHI); 951 if (ShouldReplace) { 952 953 if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { 954 DEBUG(dbgs() << "Trying to substitute physical register: " 955 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 956 << "\n"); 957 llvm_unreachable("Cannot substitute physical registers"); 958 } else { 959 DEBUG(dbgs() << "Replacing register (region): " 960 << PrintReg(Register, MRI->getTargetRegisterInfo()) 961 << " with " 962 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 963 << "\n"); 964 O.setReg(NewRegister); 965 } 966 } 967 } 968 } 969 970 void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register, 971 unsigned NewRegister, 972 bool IncludeLoopPHIs, 973 MachineRegisterInfo *MRI) { 974 replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs); 975 } 976 977 void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register, 978 unsigned NewRegister, 979 bool IncludeLoopPHIs, 980 MachineRegisterInfo *MRI) { 981 replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs); 982 } 983 984 DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; } 985 986 void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) { 987 Entry = NewEntry; 988 } 989 990 MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; } 991 992 void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; } 993 994 MachineBasicBlock *LinearizedRegion::getExit() { return Exit; } 995 996 void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); } 997 998 void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) { 999 for (const auto &MBB : InnerRegion->MBBs) { 1000 addMBB(MBB); 1001 } 1002 } 1003 1004 bool LinearizedRegion::contains(MachineBasicBlock *MBB) { 1005 return MBBs.count(MBB) == 1; 1006 } 1007 1008 bool LinearizedRegion::isLiveOut(unsigned Reg) { 1009 return LiveOuts.count(Reg) == 1; 1010 } 1011 1012 bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { 1013 return MRI->def_begin(Reg) == MRI->def_end(); 1014 } 1015 1016 bool LinearizedRegion::isDefinedInRegion(unsigned Reg, 1017 MachineRegisterInfo *MRI) { 1018 bool NoDef = hasNoDef(Reg, MRI); 1019 if (NoDef) { 1020 return false; 1021 } 1022 1023 if (!MRI->hasOneDef(Reg)) { 1024 DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1025 << " has multiple defs\n"); 1026 } 1027 1028 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 1029 MachineOperand *Def = &(*(MRI->def_begin(Reg))); 1030 return contains(Def->getParent()->getParent()); 1031 } 1032 1033 // After the code has been structurized, what was flagged as kills 1034 // before are no longer register kills. 1035 void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { 1036 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 1037 for (auto MBBI : MBBs) { 1038 MachineBasicBlock *MBB = MBBI; 1039 for (auto &II : *MBB) { 1040 for (auto &RI : II.uses()) { 1041 if (RI.isReg()) { 1042 unsigned Reg = RI.getReg(); 1043 if (TRI->isVirtualRegister(Reg)) { 1044 if (hasNoDef(Reg, MRI)) 1045 continue; 1046 if (!MRI->hasOneDef(Reg)) { 1047 DEBUG(this->getEntry()->getParent()->dump()); 1048 DEBUG(dbgs() << PrintReg(Reg, TRI) << "\n"); 1049 } 1050 1051 if (MRI->def_begin(Reg) == MRI->def_end()) { 1052 DEBUG(dbgs() << "Register " 1053 << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1054 << " has NO defs\n"); 1055 } else if (!MRI->hasOneDef(Reg)) { 1056 DEBUG(dbgs() << "Register " 1057 << PrintReg(Reg, MRI->getTargetRegisterInfo()) 1058 << " has multiple defs\n"); 1059 } 1060 1061 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 1062 MachineOperand *Def = &(*(MRI->def_begin(Reg))); 1063 MachineOperand *UseOperand = &(RI); 1064 bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; 1065 if (UseIsOutsideDefMBB && UseOperand->isKill()) { 1066 DEBUG(dbgs() << "Removing kill flag on register: " 1067 << PrintReg(Reg, TRI) << "\n"); 1068 UseOperand->setIsKill(false); 1069 } 1070 } 1071 } 1072 } 1073 } 1074 } 1075 } 1076 1077 void LinearizedRegion::initLiveOut(RegionMRT *Region, 1078 const MachineRegisterInfo *MRI, 1079 const TargetRegisterInfo *TRI, 1080 PHILinearize &PHIInfo) { 1081 storeLiveOuts(Region, MRI, TRI, PHIInfo); 1082 } 1083 1084 LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB, 1085 const MachineRegisterInfo *MRI, 1086 const TargetRegisterInfo *TRI, 1087 PHILinearize &PHIInfo) { 1088 setEntry(MBB); 1089 setExit(MBB); 1090 storeLiveOuts(MBB, MRI, TRI, PHIInfo); 1091 MBBs.insert(MBB); 1092 Parent = nullptr; 1093 } 1094 1095 LinearizedRegion::LinearizedRegion(RegionMRT *Region, 1096 const MachineRegisterInfo *MRI, 1097 const TargetRegisterInfo *TRI, 1098 PHILinearize &PHIInfo) { 1099 setEntry(Region->getEntry()); 1100 // We don't have a single exit block that is part of the region 1101 // at this point. When the transform is performed this block 1102 // will be created. 1103 setExit(nullptr); 1104 storeLiveOuts(Region, MRI, TRI, PHIInfo); 1105 Parent = nullptr; 1106 } 1107 1108 LinearizedRegion::LinearizedRegion() { 1109 setEntry(nullptr); 1110 setExit(nullptr); 1111 Parent = nullptr; 1112 } 1113 1114 LinearizedRegion::~LinearizedRegion() {} 1115 1116 class AMDGPUMachineCFGStructurizer : public MachineFunctionPass { 1117 private: 1118 const MachineRegionInfo *Regions; 1119 const SIInstrInfo *TII; 1120 const TargetRegisterInfo *TRI; 1121 MachineRegisterInfo *MRI; 1122 unsigned BBSelectRegister; 1123 PHILinearize PHIInfo; 1124 DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap; 1125 1126 void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI, 1127 SmallVector<unsigned, 2> &RegionIndices); 1128 void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, 1129 SmallVector<unsigned, 2> &RegionIndices); 1130 void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, 1131 SmallVector<unsigned, 2> &PHINonRegionIndices); 1132 1133 void storePHILinearizationInfoDest( 1134 unsigned LDestReg, MachineInstr &PHI, 1135 SmallVector<unsigned, 2> *RegionIndices = nullptr); 1136 1137 unsigned storePHILinearizationInfo(MachineInstr &PHI, 1138 SmallVector<unsigned, 2> *RegionIndices); 1139 1140 void extractKilledPHIs(MachineBasicBlock *MBB); 1141 void extractKilledPHIs(LinearizedRegion *LRegion); 1142 1143 bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices, 1144 unsigned *ReplaceReg); 1145 1146 bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1147 MachineBasicBlock *SourceMBB, 1148 SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg); 1149 1150 void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1151 MachineBasicBlock *LastMerge, 1152 SmallVector<unsigned, 2> &PHIRegionIndices); 1153 void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1154 MachineBasicBlock *IfMBB, 1155 SmallVector<unsigned, 2> &PHIRegionIndices); 1156 void replaceLiveOutRegs(MachineInstr &PHI, 1157 SmallVector<unsigned, 2> &PHIRegionIndices, 1158 unsigned CombinedSourceReg, 1159 LinearizedRegion *LRegion); 1160 void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge, 1161 MachineInstr &PHI, LinearizedRegion *LRegion); 1162 1163 void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge, 1164 LinearizedRegion *LRegion); 1165 void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB, 1166 MachineInstr &PHI); 1167 void rewriteRegionEntryPHIs(LinearizedRegion *Region, 1168 MachineBasicBlock *IfMBB); 1169 1170 bool regionIsSimpleIf(RegionMRT *Region); 1171 1172 void transformSimpleIfRegion(RegionMRT *Region); 1173 1174 bool regionIsSimpleLoop(RegionMRT *Region); 1175 1176 void transformSimpleLoopRegion(RegionMRT *Region); 1177 1178 void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II); 1179 1180 void insertUnconditionalBranch(MachineBasicBlock *MBB, 1181 MachineBasicBlock *Dest, 1182 const DebugLoc &DL = DebugLoc()); 1183 1184 MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region); 1185 1186 void replaceRegisterOutsideMBB(MachineBasicBlock *MBB, unsigned Register, 1187 unsigned NewRegister); 1188 1189 void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1190 MachineBasicBlock *MergeBB, unsigned DestRegister, 1191 unsigned IfSourceRegister, unsigned CodeSourceRegister, 1192 bool IsUndefIfSource = false); 1193 1194 MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB, 1195 MachineBasicBlock *CodeBBStart, 1196 MachineBasicBlock *CodeBBEnd, 1197 MachineBasicBlock *SelectBB, unsigned IfReg, 1198 bool InheritPreds); 1199 1200 void prunePHIInfo(MachineBasicBlock *MBB); 1201 void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg); 1202 1203 void createEntryPHIs(LinearizedRegion *CurrentRegion); 1204 void resolvePHIInfos(MachineBasicBlock *FunctionEntry); 1205 1206 void replaceRegisterWith(unsigned Register, unsigned NewRegister); 1207 1208 MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB, 1209 MachineBasicBlock *CodeBB, 1210 LinearizedRegion *LRegion, 1211 unsigned BBSelectRegIn, 1212 unsigned BBSelectRegOut); 1213 1214 MachineBasicBlock * 1215 createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion, 1216 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, 1217 unsigned BBSelectRegIn, unsigned BBSelectRegOut); 1218 void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond); 1219 1220 void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, 1221 MachineBasicBlock *MergeBB, 1222 unsigned BBSelectReg); 1223 1224 MachineInstr *getDefInstr(unsigned Reg); 1225 void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1226 MachineBasicBlock *MergeBB, 1227 LinearizedRegion *InnerRegion, unsigned DestReg, 1228 unsigned SourceReg); 1229 bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion, 1230 unsigned Register); 1231 void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1232 MachineBasicBlock *MergeBB, 1233 LinearizedRegion *InnerRegion, 1234 LinearizedRegion *LRegion); 1235 1236 void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry, 1237 MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion); 1238 void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc, 1239 LinearizedRegion *LRegion); 1240 1241 MachineBasicBlock *splitExit(LinearizedRegion *LRegion); 1242 1243 MachineBasicBlock *splitEntry(LinearizedRegion *LRegion); 1244 1245 LinearizedRegion *initLinearizedRegion(RegionMRT *Region); 1246 1247 bool structurizeComplexRegion(RegionMRT *Region); 1248 1249 bool structurizeRegion(RegionMRT *Region); 1250 1251 bool structurizeRegions(RegionMRT *Region, bool isTopRegion); 1252 1253 public: 1254 static char ID; 1255 1256 void getAnalysisUsage(AnalysisUsage &AU) const override { 1257 AU.addRequired<MachineRegionInfoPass>(); 1258 MachineFunctionPass::getAnalysisUsage(AU); 1259 } 1260 1261 AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) { 1262 initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); 1263 } 1264 1265 void initFallthroughMap(MachineFunction &MF); 1266 1267 void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut); 1268 1269 unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg, 1270 MachineRegisterInfo *MRI, 1271 const SIInstrInfo *TII); 1272 1273 RegionMRT *RMRT; 1274 void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; } 1275 1276 RegionMRT *getRegionMRT() { return RMRT; } 1277 1278 bool runOnMachineFunction(MachineFunction &MF) override; 1279 }; 1280 } 1281 1282 char AMDGPUMachineCFGStructurizer::ID = 0; 1283 1284 bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { 1285 MachineBasicBlock *Entry = Region->getEntry(); 1286 MachineBasicBlock *Succ = Region->getSucc(); 1287 bool FoundBypass = false; 1288 bool FoundIf = false; 1289 1290 if (Entry->succ_size() != 2) { 1291 return false; 1292 } 1293 1294 for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), 1295 E = Entry->succ_end(); 1296 SI != E; ++SI) { 1297 MachineBasicBlock *Current = *SI; 1298 1299 if (Current == Succ) { 1300 FoundBypass = true; 1301 } else if ((Current->succ_size() == 1) && 1302 *(Current->succ_begin()) == Succ) { 1303 FoundIf = true; 1304 } 1305 } 1306 1307 return FoundIf && FoundBypass; 1308 } 1309 1310 void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { 1311 MachineBasicBlock *Entry = Region->getEntry(); 1312 MachineBasicBlock *Exit = Region->getExit(); 1313 TII->convertNonUniformIfRegion(Entry, Exit); 1314 } 1315 1316 bool AMDGPUMachineCFGStructurizer::regionIsSimpleLoop(RegionMRT *Region) { 1317 MachineBasicBlock *Entry = Region->getEntry(); 1318 1319 if (Entry->succ_size() != 1) { 1320 return false; 1321 } 1322 1323 int NumRegionExitEdges = 0; 1324 MachineBasicBlock *BackBlock = nullptr; 1325 for (MachineBasicBlock::const_pred_iterator PI = Entry->succ_begin(), 1326 PE = Entry->succ_end(); 1327 PI != PE; ++PI) { 1328 MachineBasicBlock *CurrentSP = *PI; 1329 if (Region->contains(CurrentSP)) { 1330 BackBlock = CurrentSP; 1331 } 1332 } 1333 1334 bool HasBackedge = false; 1335 1336 for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), 1337 SE = Entry->succ_end(); 1338 SI != SE; ++SI) { 1339 MachineBasicBlock *CurrentBS = *SI; 1340 if (CurrentBS == Entry) { 1341 HasBackedge = true; 1342 } 1343 } 1344 1345 return NumRegionExitEdges == 1 && BackBlock->succ_size() == 2 && HasBackedge; 1346 } 1347 1348 void AMDGPUMachineCFGStructurizer::transformSimpleLoopRegion(RegionMRT *Region) { 1349 MachineBasicBlock *Entry = Region->getEntry(); 1350 MachineBasicBlock *BackBlock = nullptr; 1351 for (MachineBasicBlock::const_pred_iterator PI = Entry->succ_begin(), 1352 PE = Entry->succ_end(); 1353 PI != PE; ++PI) { 1354 MachineBasicBlock *CurrentSP = *PI; 1355 if (Region->contains(CurrentSP)) { 1356 BackBlock = CurrentSP; 1357 } 1358 } 1359 1360 TII->convertNonUniformLoopRegion(Entry, BackBlock); 1361 } 1362 1363 static void fixMBBTerminator(MachineBasicBlock *MBB) { 1364 1365 if (MBB->succ_size() == 1) { 1366 auto *Succ = *(MBB->succ_begin()); 1367 for (auto &TI : MBB->terminators()) { 1368 for (auto &UI : TI.uses()) { 1369 if (UI.isMBB() && UI.getMBB() != Succ) { 1370 UI.setMBB(Succ); 1371 } 1372 } 1373 } 1374 } 1375 } 1376 1377 static void fixRegionTerminator(RegionMRT *Region) { 1378 MachineBasicBlock *InternalSucc = nullptr; 1379 MachineBasicBlock *ExternalSucc = nullptr; 1380 LinearizedRegion *LRegion = Region->getLinearizedRegion(); 1381 auto Exit = LRegion->getExit(); 1382 1383 SmallPtrSet<MachineBasicBlock *, 2> Successors; 1384 for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(), 1385 SE = Exit->succ_end(); 1386 SI != SE; ++SI) { 1387 MachineBasicBlock *Succ = *SI; 1388 if (LRegion->contains(Succ)) { 1389 // Do not allow re-assign 1390 assert(InternalSucc == nullptr); 1391 InternalSucc = Succ; 1392 } else { 1393 // Do not allow re-assign 1394 assert(ExternalSucc == nullptr); 1395 ExternalSucc = Succ; 1396 } 1397 } 1398 1399 for (auto &TI : Exit->terminators()) { 1400 for (auto &UI : TI.uses()) { 1401 if (UI.isMBB()) { 1402 auto Target = UI.getMBB(); 1403 if (Target != InternalSucc && Target != ExternalSucc) { 1404 UI.setMBB(ExternalSucc); 1405 } 1406 } 1407 } 1408 } 1409 } 1410 1411 // If a region region is just a sequence of regions (and the exit 1412 // block in the case of the top level region), we can simply skip 1413 // linearizing it, because it is already linear 1414 bool regionIsSequence(RegionMRT *Region) { 1415 auto Children = Region->getChildren(); 1416 for (auto CI : *Children) { 1417 if (!CI->isRegion()) { 1418 if (CI->getMBBMRT()->getMBB()->succ_size() > 1) { 1419 return false; 1420 } 1421 } 1422 } 1423 return true; 1424 } 1425 1426 void fixupRegionExits(RegionMRT *Region) { 1427 auto Children = Region->getChildren(); 1428 for (auto CI : *Children) { 1429 if (!CI->isRegion()) { 1430 fixMBBTerminator(CI->getMBBMRT()->getMBB()); 1431 } else { 1432 fixRegionTerminator(CI->getRegionMRT()); 1433 } 1434 } 1435 } 1436 1437 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( 1438 RegionMRT *Region, MachineInstr &PHI, 1439 SmallVector<unsigned, 2> &PHIRegionIndices) { 1440 unsigned NumInputs = getPHINumInputs(PHI); 1441 for (unsigned i = 0; i < NumInputs; ++i) { 1442 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1443 if (Region->contains(Pred)) { 1444 PHIRegionIndices.push_back(i); 1445 } 1446 } 1447 } 1448 1449 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( 1450 LinearizedRegion *Region, MachineInstr &PHI, 1451 SmallVector<unsigned, 2> &PHIRegionIndices) { 1452 unsigned NumInputs = getPHINumInputs(PHI); 1453 for (unsigned i = 0; i < NumInputs; ++i) { 1454 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1455 if (Region->contains(Pred)) { 1456 PHIRegionIndices.push_back(i); 1457 } 1458 } 1459 } 1460 1461 void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices( 1462 LinearizedRegion *Region, MachineInstr &PHI, 1463 SmallVector<unsigned, 2> &PHINonRegionIndices) { 1464 unsigned NumInputs = getPHINumInputs(PHI); 1465 for (unsigned i = 0; i < NumInputs; ++i) { 1466 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1467 if (!Region->contains(Pred)) { 1468 PHINonRegionIndices.push_back(i); 1469 } 1470 } 1471 } 1472 1473 void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest( 1474 unsigned LDestReg, MachineInstr &PHI, 1475 SmallVector<unsigned, 2> *RegionIndices) { 1476 if (RegionIndices) { 1477 for (auto i : *RegionIndices) { 1478 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); 1479 } 1480 } else { 1481 unsigned NumInputs = getPHINumInputs(PHI); 1482 for (unsigned i = 0; i < NumInputs; ++i) { 1483 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); 1484 } 1485 } 1486 } 1487 1488 unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo( 1489 MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) { 1490 unsigned DestReg = getPHIDestReg(PHI); 1491 unsigned LinearizeDestReg = 1492 MRI->createVirtualRegister(MRI->getRegClass(DestReg)); 1493 PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); 1494 storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); 1495 return LinearizeDestReg; 1496 } 1497 1498 void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { 1499 // We need to create a new chain for the killed phi, but there is no 1500 // need to do the renaming outside or inside the block. 1501 SmallPtrSet<MachineInstr *, 2> PHIs; 1502 for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), 1503 E = MBB->instr_end(); 1504 I != E; ++I) { 1505 MachineInstr &Instr = *I; 1506 if (Instr.isPHI()) { 1507 unsigned PHIDestReg = getPHIDestReg(Instr); 1508 DEBUG(dbgs() << "Extractking killed phi:\n"); 1509 DEBUG(Instr.dump()); 1510 PHIs.insert(&Instr); 1511 PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); 1512 storePHILinearizationInfoDest(PHIDestReg, Instr); 1513 } 1514 } 1515 1516 for (auto PI : PHIs) { 1517 PI->eraseFromParent(); 1518 } 1519 } 1520 1521 void AMDGPUMachineCFGStructurizer::extractKilledPHIs(LinearizedRegion *LRegion) { 1522 // PHIs can only exist in the entry block. 1523 extractKilledPHIs(LRegion->getEntry()); 1524 } 1525 1526 static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices, 1527 unsigned Index) { 1528 for (auto i : PHIRegionIndices) { 1529 if (i == Index) 1530 return true; 1531 } 1532 return false; 1533 } 1534 1535 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, 1536 SmallVector<unsigned, 2> &PHIIndices, 1537 unsigned *ReplaceReg) { 1538 return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg); 1539 } 1540 1541 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, 1542 unsigned CombinedSourceReg, 1543 MachineBasicBlock *SourceMBB, 1544 SmallVector<unsigned, 2> &PHIIndices, 1545 unsigned *ReplaceReg) { 1546 DEBUG(dbgs() << "Shrink PHI: "); 1547 DEBUG(PHI.dump()); 1548 DEBUG(dbgs() << " to " << PrintReg(getPHIDestReg(PHI), TRI) 1549 << "<def> = PHI("); 1550 1551 bool Replaced = false; 1552 unsigned NumInputs = getPHINumInputs(PHI); 1553 int SingleExternalEntryIndex = -1; 1554 for (unsigned i = 0; i < NumInputs; ++i) { 1555 if (!isPHIRegionIndex(PHIIndices, i)) { 1556 if (SingleExternalEntryIndex == -1) { 1557 // Single entry 1558 SingleExternalEntryIndex = i; 1559 } else { 1560 // Multiple entries 1561 SingleExternalEntryIndex = -2; 1562 } 1563 } 1564 } 1565 1566 if (SingleExternalEntryIndex > -1) { 1567 *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex); 1568 // We should not rewrite the code, we should only pick up the single value 1569 // that represents the shrunk PHI. 1570 Replaced = true; 1571 } else { 1572 MachineBasicBlock *MBB = PHI.getParent(); 1573 MachineInstrBuilder MIB = 1574 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1575 getPHIDestReg(PHI)); 1576 if (SourceMBB) { 1577 MIB.addReg(CombinedSourceReg); 1578 MIB.addMBB(SourceMBB); 1579 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1580 << SourceMBB->getNumber()); 1581 } 1582 1583 for (unsigned i = 0; i < NumInputs; ++i) { 1584 if (isPHIRegionIndex(PHIIndices, i)) { 1585 continue; 1586 } 1587 unsigned SourceReg = getPHISourceReg(PHI, i); 1588 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1589 MIB.addReg(SourceReg); 1590 MIB.addMBB(SourcePred); 1591 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1592 << SourcePred->getNumber()); 1593 } 1594 DEBUG(dbgs() << ")\n"); 1595 } 1596 PHI.eraseFromParent(); 1597 return Replaced; 1598 } 1599 1600 void AMDGPUMachineCFGStructurizer::replacePHI( 1601 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge, 1602 SmallVector<unsigned, 2> &PHIRegionIndices) { 1603 DEBUG(dbgs() << "Replace PHI: "); 1604 DEBUG(PHI.dump()); 1605 DEBUG(dbgs() << " with " << PrintReg(getPHIDestReg(PHI), TRI) 1606 << "<def> = PHI("); 1607 1608 bool HasExternalEdge = false; 1609 unsigned NumInputs = getPHINumInputs(PHI); 1610 for (unsigned i = 0; i < NumInputs; ++i) { 1611 if (!isPHIRegionIndex(PHIRegionIndices, i)) { 1612 HasExternalEdge = true; 1613 } 1614 } 1615 1616 if (HasExternalEdge) { 1617 MachineBasicBlock *MBB = PHI.getParent(); 1618 MachineInstrBuilder MIB = 1619 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1620 getPHIDestReg(PHI)); 1621 MIB.addReg(CombinedSourceReg); 1622 MIB.addMBB(LastMerge); 1623 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1624 << LastMerge->getNumber()); 1625 for (unsigned i = 0; i < NumInputs; ++i) { 1626 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1627 continue; 1628 } 1629 unsigned SourceReg = getPHISourceReg(PHI, i); 1630 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1631 MIB.addReg(SourceReg); 1632 MIB.addMBB(SourcePred); 1633 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1634 << SourcePred->getNumber()); 1635 } 1636 DEBUG(dbgs() << ")\n"); 1637 } else { 1638 replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); 1639 } 1640 PHI.eraseFromParent(); 1641 } 1642 1643 void AMDGPUMachineCFGStructurizer::replaceEntryPHI( 1644 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, 1645 SmallVector<unsigned, 2> &PHIRegionIndices) { 1646 1647 DEBUG(dbgs() << "Replace entry PHI: "); 1648 DEBUG(PHI.dump()); 1649 DEBUG(dbgs() << " with "); 1650 1651 unsigned NumInputs = getPHINumInputs(PHI); 1652 unsigned NumNonRegionInputs = NumInputs; 1653 for (unsigned i = 0; i < NumInputs; ++i) { 1654 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1655 NumNonRegionInputs--; 1656 } 1657 } 1658 1659 if (NumNonRegionInputs == 0) { 1660 auto DestReg = getPHIDestReg(PHI); 1661 replaceRegisterWith(DestReg, CombinedSourceReg); 1662 DEBUG(dbgs() << " register " << PrintReg(CombinedSourceReg, TRI) << "\n"); 1663 PHI.eraseFromParent(); 1664 } else { 1665 DEBUG(dbgs() << PrintReg(getPHIDestReg(PHI), TRI) << "<def> = PHI("); 1666 MachineBasicBlock *MBB = PHI.getParent(); 1667 MachineInstrBuilder MIB = 1668 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1669 getPHIDestReg(PHI)); 1670 MIB.addReg(CombinedSourceReg); 1671 MIB.addMBB(IfMBB); 1672 DEBUG(dbgs() << PrintReg(CombinedSourceReg, TRI) << ", BB#" 1673 << IfMBB->getNumber()); 1674 unsigned NumInputs = getPHINumInputs(PHI); 1675 for (unsigned i = 0; i < NumInputs; ++i) { 1676 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1677 continue; 1678 } 1679 unsigned SourceReg = getPHISourceReg(PHI, i); 1680 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1681 MIB.addReg(SourceReg); 1682 MIB.addMBB(SourcePred); 1683 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 1684 << SourcePred->getNumber()); 1685 } 1686 DEBUG(dbgs() << ")\n"); 1687 PHI.eraseFromParent(); 1688 } 1689 } 1690 1691 void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs( 1692 MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices, 1693 unsigned CombinedSourceReg, LinearizedRegion *LRegion) { 1694 bool WasLiveOut = false; 1695 for (auto PII : PHIRegionIndices) { 1696 unsigned Reg = getPHISourceReg(PHI, PII); 1697 if (LRegion->isLiveOut(Reg)) { 1698 bool IsDead = true; 1699 1700 // Check if register is live out of the basic block 1701 MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent(); 1702 for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) { 1703 if ((*UI).getParent()->getParent() != DefMBB) { 1704 IsDead = false; 1705 } 1706 } 1707 1708 DEBUG(dbgs() << "Register " << PrintReg(Reg, TRI) << " is " 1709 << (IsDead ? "dead" : "alive") << " after PHI replace\n"); 1710 if (IsDead) { 1711 LRegion->removeLiveOut(Reg); 1712 } 1713 WasLiveOut = true; 1714 } 1715 } 1716 1717 if (WasLiveOut) 1718 LRegion->addLiveOut(CombinedSourceReg); 1719 } 1720 1721 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region, 1722 MachineBasicBlock *LastMerge, 1723 MachineInstr &PHI, 1724 LinearizedRegion *LRegion) { 1725 SmallVector<unsigned, 2> PHIRegionIndices; 1726 getPHIRegionIndices(Region, PHI, PHIRegionIndices); 1727 unsigned LinearizedSourceReg = 1728 storePHILinearizationInfo(PHI, &PHIRegionIndices); 1729 1730 replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices); 1731 replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion); 1732 } 1733 1734 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region, 1735 MachineBasicBlock *IfMBB, 1736 MachineInstr &PHI) { 1737 SmallVector<unsigned, 2> PHINonRegionIndices; 1738 getPHINonRegionIndices(Region, PHI, PHINonRegionIndices); 1739 unsigned LinearizedSourceReg = 1740 storePHILinearizationInfo(PHI, &PHINonRegionIndices); 1741 replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices); 1742 } 1743 1744 static void collectPHIs(MachineBasicBlock *MBB, 1745 SmallVector<MachineInstr *, 2> &PHIs) { 1746 for (auto &BBI : *MBB) { 1747 if (BBI.isPHI()) { 1748 PHIs.push_back(&BBI); 1749 } 1750 } 1751 } 1752 1753 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region, 1754 MachineBasicBlock *LastMerge, 1755 LinearizedRegion *LRegion) { 1756 SmallVector<MachineInstr *, 2> PHIs; 1757 auto Exit = Region->getSucc(); 1758 if (Exit == nullptr) 1759 return; 1760 1761 collectPHIs(Exit, PHIs); 1762 1763 for (auto PHII : PHIs) { 1764 rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion); 1765 } 1766 } 1767 1768 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region, 1769 MachineBasicBlock *IfMBB) { 1770 SmallVector<MachineInstr *, 2> PHIs; 1771 auto Entry = Region->getEntry(); 1772 1773 collectPHIs(Entry, PHIs); 1774 1775 for (auto PHII : PHIs) { 1776 rewriteRegionEntryPHI(Region, IfMBB, *PHII); 1777 } 1778 } 1779 1780 void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, 1781 MachineBasicBlock *Dest, 1782 const DebugLoc &DL) { 1783 DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber() 1784 << " -> " << Dest->getNumber() << "\n"); 1785 MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator(); 1786 bool HasTerminator = Terminator != MBB->instr_end(); 1787 if (HasTerminator) { 1788 TII->ReplaceTailWithBranchTo(Terminator, Dest); 1789 } 1790 if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) { 1791 TII->insertUnconditionalBranch(*MBB, Dest, DL); 1792 } 1793 } 1794 1795 static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) { 1796 MachineBasicBlock *result = nullptr; 1797 for (auto &MFI : MF) { 1798 if (MFI.succ_size() == 0) { 1799 if (result == nullptr) { 1800 result = &MFI; 1801 } else { 1802 return nullptr; 1803 } 1804 } 1805 } 1806 1807 return result; 1808 } 1809 1810 static bool hasOneExitNode(MachineFunction &MF) { 1811 return getSingleExitNode(MF) != nullptr; 1812 } 1813 1814 MachineBasicBlock * 1815 AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) { 1816 auto Exit = Region->getSucc(); 1817 1818 // If the exit is the end of the function, we just use the existing 1819 MachineFunction *MF = Region->getEntry()->getParent(); 1820 if (Exit == nullptr && hasOneExitNode(*MF)) { 1821 return &(*(--(Region->getEntry()->getParent()->end()))); 1822 } 1823 1824 MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock(); 1825 if (Exit == nullptr) { 1826 MachineFunction::iterator ExitIter = MF->end(); 1827 MF->insert(ExitIter, LastMerge); 1828 } else { 1829 MachineFunction::iterator ExitIter = Exit->getIterator(); 1830 MF->insert(ExitIter, LastMerge); 1831 LastMerge->addSuccessor(Exit); 1832 insertUnconditionalBranch(LastMerge, Exit); 1833 DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() << "\n"); 1834 } 1835 return LastMerge; 1836 } 1837 1838 void AMDGPUMachineCFGStructurizer::replaceRegisterOutsideMBB(MachineBasicBlock *MBB, 1839 unsigned Register, 1840 unsigned NewRegister) { 1841 assert(Register != NewRegister && "Cannot replace a reg with itself"); 1842 1843 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 1844 E = MRI->reg_end(); 1845 I != E;) { 1846 MachineOperand &O = *I; 1847 ++I; 1848 if (O.getParent()->getParent() != MBB) { 1849 if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { 1850 llvm_unreachable("Cannot substitute physical registers"); 1851 // We don't handle physical registers, but if we need to in the future 1852 // This is how we do it: O.substPhysReg(NewRegister, *TRI); 1853 } else { 1854 O.setReg(NewRegister); 1855 } 1856 } 1857 } 1858 } 1859 1860 void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, 1861 MachineBasicBlock *CodeBB, 1862 MachineBasicBlock *MergeBB, 1863 unsigned DestRegister, 1864 unsigned IfSourceRegister, 1865 unsigned CodeSourceRegister, 1866 bool IsUndefIfSource) { 1867 // If this is the function exit block, we don't need a phi. 1868 if (MergeBB->succ_begin() == MergeBB->succ_end()) { 1869 return; 1870 } 1871 DEBUG(dbgs() << "Merge PHI (BB#" << MergeBB->getNumber() 1872 << "): " << PrintReg(DestRegister, TRI) << "<def> = PHI(" 1873 << PrintReg(IfSourceRegister, TRI) << ", BB#" 1874 << IfBB->getNumber() << PrintReg(CodeSourceRegister, TRI) 1875 << ", BB#" << CodeBB->getNumber() << ")\n"); 1876 const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin()); 1877 MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, 1878 TII->get(TargetOpcode::PHI), DestRegister); 1879 if (IsUndefIfSource && false) { 1880 MIB.addReg(IfSourceRegister, RegState::Undef); 1881 } else { 1882 MIB.addReg(IfSourceRegister); 1883 } 1884 MIB.addMBB(IfBB); 1885 MIB.addReg(CodeSourceRegister); 1886 MIB.addMBB(CodeBB); 1887 } 1888 1889 static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) { 1890 for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), 1891 E = MBB->succ_end(); 1892 PI != E; ++PI) { 1893 if ((*PI) != MBB) { 1894 (MBB)->removeSuccessor(*PI); 1895 } 1896 } 1897 } 1898 1899 static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, 1900 MachineBasicBlock *EndMBB) { 1901 1902 // We have to check against the StartMBB successor becasuse a 1903 // structurized region with a loop will have the entry block split, 1904 // and the backedge will go to the entry successor. 1905 DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs; 1906 unsigned SuccSize = StartMBB->succ_size(); 1907 if (SuccSize > 0) { 1908 MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); 1909 for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), 1910 E = EndMBB->succ_end(); 1911 PI != E; ++PI) { 1912 // Either we have a back-edge to the entry block, or a back-edge to the 1913 // succesor of the entry block since the block may be split. 1914 if ((*PI) != StartMBB && 1915 !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { 1916 Succs.insert( 1917 std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI)); 1918 } 1919 } 1920 } 1921 1922 for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), 1923 E = StartMBB->pred_end(); 1924 PI != E; ++PI) { 1925 if ((*PI) != EndMBB) { 1926 Succs.insert( 1927 std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB)); 1928 } 1929 } 1930 1931 for (auto SI : Succs) { 1932 std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI; 1933 DEBUG(dbgs() << "Removing edge: BB#" << Edge.first->getNumber() << " -> BB#" 1934 << Edge.second->getNumber() << "\n"); 1935 Edge.first->removeSuccessor(Edge.second); 1936 } 1937 } 1938 1939 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( 1940 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart, 1941 MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg, 1942 bool InheritPreds) { 1943 MachineFunction *MF = MergeBB->getParent(); 1944 MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); 1945 1946 if (InheritPreds) { 1947 for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), 1948 E = CodeBBStart->pred_end(); 1949 PI != E; ++PI) { 1950 if ((*PI) != CodeBBEnd) { 1951 MachineBasicBlock *Pred = (*PI); 1952 Pred->addSuccessor(IfBB); 1953 } 1954 } 1955 } 1956 1957 removeExternalCFGEdges(CodeBBStart, CodeBBEnd); 1958 1959 auto CodeBBStartI = CodeBBStart->getIterator(); 1960 auto CodeBBEndI = CodeBBEnd->getIterator(); 1961 auto MergeIter = MergeBB->getIterator(); 1962 MF->insert(MergeIter, IfBB); 1963 MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI); 1964 IfBB->addSuccessor(MergeBB); 1965 IfBB->addSuccessor(CodeBBStart); 1966 1967 DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); 1968 // Ensure that the MergeBB is a succesor of the CodeEndBB. 1969 if (!CodeBBEnd->isSuccessor(MergeBB)) 1970 CodeBBEnd->addSuccessor(MergeBB); 1971 1972 DEBUG(dbgs() << "Moved MBB#" << CodeBBStart->getNumber() << " through MBB#" 1973 << CodeBBEnd->getNumber() << "\n"); 1974 1975 // If we have a single predecessor we can find a reasonable debug location 1976 MachineBasicBlock *SinglePred = 1977 CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr; 1978 const DebugLoc &DL = SinglePred 1979 ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator()) 1980 : DebugLoc(); 1981 1982 unsigned Reg = 1983 TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg, 1984 SelectBB->getNumber() /* CodeBBStart->getNumber() */); 1985 if (&(*(IfBB->getParent()->begin())) == IfBB) { 1986 TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg, 1987 CodeBBStart->getNumber()); 1988 } 1989 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); 1990 ArrayRef<MachineOperand> Cond(RegOp); 1991 TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL); 1992 1993 return IfBB; 1994 } 1995 1996 void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled( 1997 SmallVector<MachineOperand, 1> Cond) { 1998 if (Cond.size() != 1) 1999 return; 2000 if (!Cond[0].isReg()) 2001 return; 2002 2003 unsigned CondReg = Cond[0].getReg(); 2004 for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { 2005 (*UI).setIsKill(false); 2006 } 2007 } 2008 2009 void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, 2010 MachineBasicBlock *MergeBB, 2011 unsigned BBSelectReg) { 2012 MachineBasicBlock *TrueBB = nullptr; 2013 MachineBasicBlock *FalseBB = nullptr; 2014 SmallVector<MachineOperand, 1> Cond; 2015 MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB]; 2016 TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond); 2017 2018 const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator()); 2019 2020 if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) { 2021 // This is an exit block, hence no successors. We will assign the 2022 // bb select register to the entry block. 2023 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 2024 BBSelectReg, 2025 CodeBB->getParent()->begin()->getNumber()); 2026 insertUnconditionalBranch(CodeBB, MergeBB, DL); 2027 return; 2028 } 2029 2030 if (FalseBB == nullptr && TrueBB == nullptr) { 2031 TrueBB = FallthroughBB; 2032 } else if (TrueBB != nullptr) { 2033 FalseBB = 2034 (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB; 2035 } 2036 2037 if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) { 2038 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 2039 BBSelectReg, TrueBB->getNumber()); 2040 } else { 2041 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); 2042 unsigned TrueBBReg = MRI->createVirtualRegister(RegClass); 2043 unsigned FalseBBReg = MRI->createVirtualRegister(RegClass); 2044 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 2045 TrueBBReg, TrueBB->getNumber()); 2046 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 2047 FalseBBReg, FalseBB->getNumber()); 2048 ensureCondIsNotKilled(Cond); 2049 TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL, 2050 BBSelectReg, Cond, TrueBBReg, FalseBBReg); 2051 } 2052 2053 insertUnconditionalBranch(CodeBB, MergeBB, DL); 2054 } 2055 2056 MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) { 2057 if (MRI->def_begin(Reg) == MRI->def_end()) { 2058 DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) 2059 << " has NO defs\n"); 2060 } else if (!MRI->hasOneDef(Reg)) { 2061 DEBUG(dbgs() << "Register " << PrintReg(Reg, MRI->getTargetRegisterInfo()) 2062 << " has multiple defs\n"); 2063 DEBUG(dbgs() << "DEFS BEGIN:\n"); 2064 for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { 2065 DEBUG(DI->getParent()->dump()); 2066 } 2067 DEBUG(dbgs() << "DEFS END\n"); 2068 } 2069 2070 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 2071 return (*(MRI->def_begin(Reg))).getParent(); 2072 } 2073 2074 void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, 2075 MachineBasicBlock *CodeBB, 2076 MachineBasicBlock *MergeBB, 2077 LinearizedRegion *InnerRegion, 2078 unsigned DestReg, 2079 unsigned SourceReg) { 2080 // In this function we know we are part of a chain already, so we need 2081 // to add the registers to the existing chain, and rename the register 2082 // inside the region. 2083 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); 2084 MachineInstr *DefInstr = getDefInstr(SourceReg); 2085 if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) { 2086 // Handle the case where the def is a PHI-def inside a basic 2087 // block, then we only need to do renaming. Special care needs to 2088 // be taken if the PHI-def is part of an existing chain, or if a 2089 // new one needs to be created. 2090 InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI); 2091 2092 // We collect all PHI Information, and if we are at the region entry, 2093 // all PHIs will be removed, and then re-introduced if needed. 2094 storePHILinearizationInfoDest(DestReg, *DefInstr); 2095 // We have picked up all the information we need now and can remove 2096 // the PHI 2097 PHIInfo.removeSource(DestReg, SourceReg, CodeBB); 2098 DefInstr->eraseFromParent(); 2099 } else { 2100 // If this is not a phi-def, or it is a phi-def but from a linearized region 2101 if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) { 2102 // If this is a single BB and the definition is in this block we 2103 // need to replace any uses outside the region. 2104 InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); 2105 } 2106 const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); 2107 unsigned NextDestReg = MRI->createVirtualRegister(RegClass); 2108 bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; 2109 DEBUG(dbgs() << "Insert Chained PHI\n"); 2110 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, 2111 SourceReg, IsLastDef); 2112 2113 PHIInfo.removeSource(DestReg, SourceReg, CodeBB); 2114 if (IsLastDef) { 2115 const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator()); 2116 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL, 2117 NextDestReg, 0); 2118 PHIInfo.deleteDef(DestReg); 2119 } else { 2120 PHIInfo.replaceDef(DestReg, NextDestReg); 2121 } 2122 } 2123 } 2124 2125 bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB, 2126 LinearizedRegion *InnerRegion, 2127 unsigned Register) { 2128 return getDefInstr(Register)->getParent() == MBB || 2129 InnerRegion->contains(getDefInstr(Register)->getParent()); 2130 } 2131 2132 void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, 2133 MachineBasicBlock *CodeBB, 2134 MachineBasicBlock *MergeBB, 2135 LinearizedRegion *InnerRegion, 2136 LinearizedRegion *LRegion) { 2137 DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts(); 2138 SmallVector<unsigned, 4> OldLiveOuts; 2139 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); 2140 for (auto OLI : *LiveOuts) { 2141 OldLiveOuts.push_back(OLI); 2142 } 2143 2144 for (auto LI : OldLiveOuts) { 2145 DEBUG(dbgs() << "LiveOut: " << PrintReg(LI, TRI)); 2146 if (!containsDef(CodeBB, InnerRegion, LI) || 2147 (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) { 2148 // If the register simly lives through the CodeBB, we don't have 2149 // to rewrite anything since the register is not defined in this 2150 // part of the code. 2151 DEBUG(dbgs() << "- through"); 2152 continue; 2153 } 2154 DEBUG(dbgs() << "\n"); 2155 unsigned Reg = LI; 2156 if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) { 2157 // If the register is live out, we do want to create a phi, 2158 // unless it is from the Exit block, becasuse in that case there 2159 // is already a PHI, and no need to create a new one. 2160 2161 // If the register is just a live out def and not part of a phi 2162 // chain, we need to create a PHI node to handle the if region, 2163 // and replace all uses outside of the region with the new dest 2164 // register, unless it is the outgoing BB select register. We have 2165 // already creaed phi nodes for these. 2166 const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); 2167 unsigned PHIDestReg = MRI->createVirtualRegister(RegClass); 2168 unsigned IfSourceReg = MRI->createVirtualRegister(RegClass); 2169 // Create initializer, this value is never used, but is needed 2170 // to satisfy SSA. 2171 DEBUG(dbgs() << "Initializer for reg: " << PrintReg(Reg) << "\n"); 2172 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), 2173 IfSourceReg, 0); 2174 2175 InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); 2176 DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); 2177 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, 2178 IfSourceReg, Reg, true); 2179 } 2180 } 2181 2182 // Handle the chained definitions in PHIInfo, checking if this basic block 2183 // is a source block for a definition. 2184 SmallVector<unsigned, 4> Sources; 2185 if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) { 2186 DEBUG(dbgs() << "Inserting PHI Live Out from BB#" << CodeBB->getNumber() 2187 << "\n"); 2188 for (auto SI : Sources) { 2189 unsigned DestReg; 2190 PHIInfo.findDest(SI, CodeBB, DestReg); 2191 insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI); 2192 } 2193 DEBUG(dbgs() << "Insertion done.\n"); 2194 } 2195 2196 PHIInfo.dump(MRI); 2197 } 2198 2199 void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) { 2200 DEBUG(dbgs() << "Before PHI Prune\n"); 2201 DEBUG(PHIInfo.dump(MRI)); 2202 SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4> 2203 ElimiatedSources; 2204 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2205 ++DRI) { 2206 2207 unsigned DestReg = *DRI; 2208 auto SE = PHIInfo.sources_end(DestReg); 2209 2210 bool MBBContainsPHISource = false; 2211 // Check if there is a PHI source in this MBB 2212 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2213 unsigned SourceReg = (*SRI).first; 2214 MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); 2215 if (Def->getParent()->getParent() == MBB) { 2216 MBBContainsPHISource = true; 2217 } 2218 } 2219 2220 // If so, all other sources are useless since we know this block 2221 // is always executed when the region is executed. 2222 if (MBBContainsPHISource) { 2223 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2224 PHILinearize::PHISourceT Source = *SRI; 2225 unsigned SourceReg = Source.first; 2226 MachineBasicBlock *SourceMBB = Source.second; 2227 MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); 2228 if (Def->getParent()->getParent() != MBB) { 2229 ElimiatedSources.push_back( 2230 std::make_tuple(DestReg, SourceReg, SourceMBB)); 2231 } 2232 } 2233 } 2234 } 2235 2236 // Remove the PHI sources that are in the given MBB 2237 for (auto &SourceInfo : ElimiatedSources) { 2238 PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo), 2239 std::get<2>(SourceInfo)); 2240 } 2241 DEBUG(dbgs() << "After PHI Prune\n"); 2242 DEBUG(PHIInfo.dump(MRI)); 2243 } 2244 2245 void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion, 2246 unsigned DestReg) { 2247 MachineBasicBlock *Entry = CurrentRegion->getEntry(); 2248 MachineBasicBlock *Exit = CurrentRegion->getExit(); 2249 MachineBasicBlock *Pred = *(Entry->pred_begin()); 2250 2251 DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() 2252 << " Pred: " << Pred->getNumber() << "\n"); 2253 2254 int NumSources = 0; 2255 auto SE = PHIInfo.sources_end(DestReg); 2256 2257 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2258 NumSources++; 2259 } 2260 2261 if (NumSources == 1) { 2262 auto SRI = PHIInfo.sources_begin(DestReg); 2263 unsigned SourceReg = (*SRI).first; 2264 replaceRegisterWith(DestReg, SourceReg); 2265 } else { 2266 const DebugLoc &DL = Entry->findDebugLoc(Entry->begin()); 2267 MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, 2268 TII->get(TargetOpcode::PHI), DestReg); 2269 DEBUG(dbgs() << "Entry PHI " << PrintReg(DestReg, TRI) << "<def> = PHI("); 2270 2271 unsigned CurrentBackedgeReg = 0; 2272 2273 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2274 unsigned SourceReg = (*SRI).first; 2275 2276 if (CurrentRegion->contains((*SRI).second)) { 2277 if (CurrentBackedgeReg == 0) { 2278 CurrentBackedgeReg = SourceReg; 2279 } else { 2280 MachineInstr *PHIDefInstr = getDefInstr(SourceReg); 2281 MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent(); 2282 const TargetRegisterClass *RegClass = 2283 MRI->getRegClass(CurrentBackedgeReg); 2284 unsigned NewBackedgeReg = MRI->createVirtualRegister(RegClass); 2285 MachineInstrBuilder BackedgePHI = 2286 BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL, 2287 TII->get(TargetOpcode::PHI), NewBackedgeReg); 2288 BackedgePHI.addReg(CurrentBackedgeReg); 2289 BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0)); 2290 BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1)); 2291 BackedgePHI.addMBB((*SRI).second); 2292 CurrentBackedgeReg = NewBackedgeReg; 2293 DEBUG(dbgs() << "Inserting backedge PHI: " 2294 << PrintReg(NewBackedgeReg, TRI) << "<def> = PHI(" 2295 << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" 2296 << getPHIPred(*PHIDefInstr, 0)->getNumber() << ", " 2297 << PrintReg(getPHISourceReg(*PHIDefInstr, 1), TRI) 2298 << ", BB#" << (*SRI).second->getNumber()); 2299 } 2300 } else { 2301 MIB.addReg(SourceReg); 2302 MIB.addMBB((*SRI).second); 2303 DEBUG(dbgs() << PrintReg(SourceReg, TRI) << ", BB#" 2304 << (*SRI).second->getNumber() << ", "); 2305 } 2306 } 2307 2308 // Add the final backedge register source to the entry phi 2309 if (CurrentBackedgeReg != 0) { 2310 MIB.addReg(CurrentBackedgeReg); 2311 MIB.addMBB(Exit); 2312 DEBUG(dbgs() << PrintReg(CurrentBackedgeReg, TRI) << ", BB#" 2313 << Exit->getNumber() << ")\n"); 2314 } else { 2315 DEBUG(dbgs() << ")\n"); 2316 } 2317 } 2318 } 2319 2320 void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { 2321 DEBUG(PHIInfo.dump(MRI)); 2322 2323 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2324 ++DRI) { 2325 2326 unsigned DestReg = *DRI; 2327 createEntryPHI(CurrentRegion, DestReg); 2328 } 2329 PHIInfo.clear(); 2330 } 2331 2332 void AMDGPUMachineCFGStructurizer::replaceRegisterWith(unsigned Register, 2333 unsigned NewRegister) { 2334 assert(Register != NewRegister && "Cannot replace a reg with itself"); 2335 2336 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 2337 E = MRI->reg_end(); 2338 I != E;) { 2339 MachineOperand &O = *I; 2340 ++I; 2341 if (TargetRegisterInfo::isPhysicalRegister(NewRegister)) { 2342 DEBUG(dbgs() << "Trying to substitute physical register: " 2343 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 2344 << "\n"); 2345 llvm_unreachable("Cannot substitute physical registers"); 2346 // We don't handle physical registers, but if we need to 2347 // in the future This is how we do it: 2348 // O.substPhysReg(NewRegister, *TRI); 2349 } else { 2350 DEBUG(dbgs() << "Replacing register: " 2351 << PrintReg(Register, MRI->getTargetRegisterInfo()) 2352 << " with " 2353 << PrintReg(NewRegister, MRI->getTargetRegisterInfo()) 2354 << "\n"); 2355 O.setReg(NewRegister); 2356 } 2357 } 2358 PHIInfo.deleteDef(Register); 2359 2360 getRegionMRT()->replaceLiveOutReg(Register, NewRegister); 2361 2362 DEBUG(PHIInfo.dump(MRI)); 2363 } 2364 2365 void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) { 2366 DEBUG(dbgs() << "Resolve PHI Infos\n"); 2367 DEBUG(PHIInfo.dump(MRI)); 2368 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2369 ++DRI) { 2370 unsigned DestReg = *DRI; 2371 DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) << "\n"); 2372 auto SRI = PHIInfo.sources_begin(DestReg); 2373 unsigned SourceReg = (*SRI).first; 2374 DEBUG(dbgs() << "DestReg: " << PrintReg(DestReg, TRI) 2375 << " SourceReg: " << PrintReg(SourceReg, TRI) << "\n"); 2376 2377 assert(PHIInfo.sources_end(DestReg) == ++SRI && 2378 "More than one phi source in entry node"); 2379 replaceRegisterWith(DestReg, SourceReg); 2380 } 2381 } 2382 2383 static bool isFunctionEntryBlock(MachineBasicBlock *MBB) { 2384 return ((&(*(MBB->getParent()->begin()))) == MBB); 2385 } 2386 2387 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( 2388 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB, 2389 LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn, 2390 unsigned BBSelectRegOut) { 2391 if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) { 2392 // Handle non-loop function entry block. 2393 // We need to allow loops to the entry block and then 2394 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); 2395 resolvePHIInfos(CodeBB); 2396 removeExternalCFGSuccessors(CodeBB); 2397 CodeBB->addSuccessor(MergeBB); 2398 CurrentRegion->addMBB(CodeBB); 2399 return nullptr; 2400 } 2401 if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) { 2402 // Handle non-loop region entry block. 2403 MachineFunction *MF = MergeBB->getParent(); 2404 auto MergeIter = MergeBB->getIterator(); 2405 auto CodeBBStartIter = CodeBB->getIterator(); 2406 auto CodeBBEndIter = ++(CodeBB->getIterator()); 2407 if (CodeBBEndIter != MergeIter) { 2408 MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter); 2409 } 2410 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); 2411 prunePHIInfo(CodeBB); 2412 createEntryPHIs(CurrentRegion); 2413 removeExternalCFGSuccessors(CodeBB); 2414 CodeBB->addSuccessor(MergeBB); 2415 CurrentRegion->addMBB(CodeBB); 2416 return nullptr; 2417 } else { 2418 // Handle internal block. 2419 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); 2420 unsigned CodeBBSelectReg = MRI->createVirtualRegister(RegClass); 2421 rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); 2422 bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; 2423 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, 2424 BBSelectRegIn, IsRegionEntryBB); 2425 CurrentRegion->addMBB(IfBB); 2426 // If this is the entry block we need to make the If block the new 2427 // linearized region entry. 2428 if (IsRegionEntryBB) { 2429 CurrentRegion->setEntry(IfBB); 2430 2431 if (CurrentRegion->getHasLoop()) { 2432 MachineBasicBlock *RegionExit = CurrentRegion->getExit(); 2433 MachineBasicBlock *ETrueBB = nullptr; 2434 MachineBasicBlock *EFalseBB = nullptr; 2435 SmallVector<MachineOperand, 1> ECond; 2436 2437 const DebugLoc &DL = DebugLoc(); 2438 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); 2439 TII->removeBranch(*RegionExit); 2440 2441 // We need to create a backedge if there is a loop 2442 unsigned Reg = TII->insertNE( 2443 RegionExit, RegionExit->instr_end(), DL, 2444 CurrentRegion->getRegionMRT()->getInnerOutputRegister(), 2445 CurrentRegion->getRegionMRT()->getEntry()->getNumber()); 2446 MachineOperand RegOp = 2447 MachineOperand::CreateReg(Reg, false, false, true); 2448 ArrayRef<MachineOperand> Cond(RegOp); 2449 DEBUG(dbgs() << "RegionExitReg: "); 2450 DEBUG(Cond[0].print(dbgs(), TRI)); 2451 DEBUG(dbgs() << "\n"); 2452 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, 2453 Cond, DebugLoc()); 2454 RegionExit->addSuccessor(CurrentRegion->getEntry()); 2455 } 2456 } 2457 CurrentRegion->addMBB(CodeBB); 2458 LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); 2459 2460 InnerRegion.setParent(CurrentRegion); 2461 DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); 2462 insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, 2463 CodeBBSelectReg); 2464 InnerRegion.addMBB(MergeBB); 2465 2466 DEBUG(InnerRegion.print(dbgs(), TRI)); 2467 rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); 2468 extractKilledPHIs(CodeBB); 2469 if (IsRegionEntryBB) { 2470 createEntryPHIs(CurrentRegion); 2471 } 2472 return IfBB; 2473 } 2474 } 2475 2476 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( 2477 MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion, 2478 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, 2479 unsigned BBSelectRegIn, unsigned BBSelectRegOut) { 2480 unsigned CodeBBSelectReg = 2481 InnerRegion->getRegionMRT()->getInnerOutputRegister(); 2482 MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry(); 2483 MachineBasicBlock *CodeExitBB = InnerRegion->getExit(); 2484 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB, 2485 SelectBB, BBSelectRegIn, true); 2486 CurrentRegion->addMBB(IfBB); 2487 bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry(); 2488 if (isEntry) { 2489 2490 if (CurrentRegion->getHasLoop()) { 2491 MachineBasicBlock *RegionExit = CurrentRegion->getExit(); 2492 MachineBasicBlock *ETrueBB = nullptr; 2493 MachineBasicBlock *EFalseBB = nullptr; 2494 SmallVector<MachineOperand, 1> ECond; 2495 2496 const DebugLoc &DL = DebugLoc(); 2497 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); 2498 TII->removeBranch(*RegionExit); 2499 2500 // We need to create a backedge if there is a loop 2501 unsigned Reg = 2502 TII->insertNE(RegionExit, RegionExit->instr_end(), DL, 2503 CurrentRegion->getRegionMRT()->getInnerOutputRegister(), 2504 CurrentRegion->getRegionMRT()->getEntry()->getNumber()); 2505 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); 2506 ArrayRef<MachineOperand> Cond(RegOp); 2507 DEBUG(dbgs() << "RegionExitReg: "); 2508 DEBUG(Cond[0].print(dbgs(), TRI)); 2509 DEBUG(dbgs() << "\n"); 2510 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, 2511 Cond, DebugLoc()); 2512 RegionExit->addSuccessor(IfBB); 2513 } 2514 } 2515 CurrentRegion->addMBBs(InnerRegion); 2516 DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); 2517 insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, 2518 CodeBBSelectReg); 2519 2520 rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion, 2521 CurrentRegion); 2522 2523 rewriteRegionEntryPHIs(InnerRegion, IfBB); 2524 2525 if (isEntry) { 2526 CurrentRegion->setEntry(IfBB); 2527 } 2528 2529 if (isEntry) { 2530 createEntryPHIs(CurrentRegion); 2531 } 2532 2533 return IfBB; 2534 } 2535 2536 void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, 2537 MachineBasicBlock *Entry, 2538 MachineBasicBlock *EntrySucc, 2539 LinearizedRegion *LRegion) { 2540 SmallVector<unsigned, 2> PHIRegionIndices; 2541 getPHIRegionIndices(LRegion, PHI, PHIRegionIndices); 2542 2543 assert(PHIRegionIndices.size() == 1); 2544 2545 unsigned RegionIndex = PHIRegionIndices[0]; 2546 unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex); 2547 MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex); 2548 unsigned PHIDest = getPHIDestReg(PHI); 2549 unsigned PHISource = PHIDest; 2550 unsigned ReplaceReg; 2551 2552 if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) { 2553 PHISource = ReplaceReg; 2554 } 2555 2556 const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); 2557 unsigned NewDestReg = MRI->createVirtualRegister(RegClass); 2558 LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); 2559 MachineInstrBuilder MIB = 2560 BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), 2561 TII->get(TargetOpcode::PHI), NewDestReg); 2562 DEBUG(dbgs() << "Split Entry PHI " << PrintReg(NewDestReg, TRI) 2563 << "<def> = PHI("); 2564 MIB.addReg(PHISource); 2565 MIB.addMBB(Entry); 2566 DEBUG(dbgs() << PrintReg(PHISource, TRI) << ", BB#" << Entry->getNumber()); 2567 MIB.addReg(RegionSourceReg); 2568 MIB.addMBB(RegionSourceMBB); 2569 DEBUG(dbgs() << " ," << PrintReg(RegionSourceReg, TRI) << ", BB#" 2570 << RegionSourceMBB->getNumber() << ")\n"); 2571 } 2572 2573 void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, 2574 MachineBasicBlock *EntrySucc, 2575 LinearizedRegion *LRegion) { 2576 SmallVector<MachineInstr *, 2> PHIs; 2577 collectPHIs(Entry, PHIs); 2578 2579 for (auto PHII : PHIs) { 2580 splitLoopPHI(*PHII, Entry, EntrySucc, LRegion); 2581 } 2582 } 2583 2584 // Split the exit block so that we can insert a end control flow 2585 MachineBasicBlock * 2586 AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) { 2587 auto MRTRegion = LRegion->getRegionMRT(); 2588 auto Exit = LRegion->getExit(); 2589 auto MF = Exit->getParent(); 2590 auto Succ = MRTRegion->getSucc(); 2591 2592 auto NewExit = MF->CreateMachineBasicBlock(); 2593 auto AfterExitIter = Exit->getIterator(); 2594 AfterExitIter++; 2595 MF->insert(AfterExitIter, NewExit); 2596 Exit->removeSuccessor(Succ); 2597 Exit->addSuccessor(NewExit); 2598 NewExit->addSuccessor(Succ); 2599 insertUnconditionalBranch(NewExit, Succ); 2600 LRegion->addMBB(NewExit); 2601 LRegion->setExit(NewExit); 2602 2603 DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() << "\n"); 2604 2605 // Replace any PHI Predecessors in the successor with NewExit 2606 for (auto &II : *Succ) { 2607 MachineInstr &Instr = II; 2608 2609 // If we are past the PHI instructions we are done 2610 if (!Instr.isPHI()) 2611 break; 2612 2613 int numPreds = getPHINumInputs(Instr); 2614 for (int i = 0; i < numPreds; ++i) { 2615 auto Pred = getPHIPred(Instr, i); 2616 if (Pred == Exit) { 2617 setPhiPred(Instr, i, NewExit); 2618 } 2619 } 2620 } 2621 2622 return NewExit; 2623 } 2624 2625 2626 static MachineBasicBlock *split(MachineBasicBlock::iterator I) { 2627 // Create the fall-through block. 2628 MachineBasicBlock *MBB = (*I).getParent(); 2629 MachineFunction *MF = MBB->getParent(); 2630 MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock(); 2631 auto MBBIter = ++(MBB->getIterator()); 2632 MF->insert(MBBIter, SuccMBB); 2633 SuccMBB->transferSuccessorsAndUpdatePHIs(MBB); 2634 MBB->addSuccessor(SuccMBB); 2635 2636 // Splice the code over. 2637 SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end()); 2638 2639 return SuccMBB; 2640 } 2641 2642 // Split the entry block separating PHI-nodes and the rest of the code 2643 // This is needed to insert an initializer for the bb select register 2644 // inloop regions. 2645 2646 MachineBasicBlock * 2647 AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { 2648 MachineBasicBlock *Entry = LRegion->getEntry(); 2649 MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI()); 2650 MachineBasicBlock *Exit = LRegion->getExit(); 2651 2652 DEBUG(dbgs() << "Split BB#" << Entry->getNumber() << " to BB#" 2653 << Entry->getNumber() << " -> BB#" << EntrySucc->getNumber() 2654 << "\n"); 2655 LRegion->addMBB(EntrySucc); 2656 2657 // Make the backedge go to Entry Succ 2658 if (Exit->isSuccessor(Entry)) { 2659 Exit->removeSuccessor(Entry); 2660 } 2661 Exit->addSuccessor(EntrySucc); 2662 MachineInstr &Branch = *(Exit->instr_rbegin()); 2663 for (auto &UI : Branch.uses()) { 2664 if (UI.isMBB() && UI.getMBB() == Entry) { 2665 UI.setMBB(EntrySucc); 2666 } 2667 } 2668 2669 splitLoopPHIs(Entry, EntrySucc, LRegion); 2670 2671 return EntrySucc; 2672 } 2673 2674 LinearizedRegion * 2675 AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) { 2676 LinearizedRegion *LRegion = Region->getLinearizedRegion(); 2677 LRegion->initLiveOut(Region, MRI, TRI, PHIInfo); 2678 LRegion->setEntry(Region->getEntry()); 2679 return LRegion; 2680 } 2681 2682 static void removeOldExitPreds(RegionMRT *Region) { 2683 MachineBasicBlock *Exit = Region->getSucc(); 2684 if (Exit == nullptr) { 2685 return; 2686 } 2687 for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), 2688 E = Exit->pred_end(); 2689 PI != E; ++PI) { 2690 if (Region->contains(*PI)) { 2691 (*PI)->removeSuccessor(Exit); 2692 } 2693 } 2694 } 2695 2696 static bool mbbHasBackEdge(MachineBasicBlock *MBB, 2697 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { 2698 for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { 2699 if (MBBs.count(*SI) != 0) { 2700 return true; 2701 } 2702 } 2703 return false; 2704 } 2705 2706 static bool containsNewBackedge(MRT *Tree, 2707 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { 2708 // Need to traverse this in reverse since it is in post order. 2709 if (Tree == nullptr) 2710 return false; 2711 2712 if (Tree->isMBB()) { 2713 MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB(); 2714 MBBs.insert(MBB); 2715 if (mbbHasBackEdge(MBB, MBBs)) { 2716 return true; 2717 } 2718 } else { 2719 RegionMRT *Region = Tree->getRegionMRT(); 2720 SetVector<MRT *> *Children = Region->getChildren(); 2721 for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) { 2722 if (containsNewBackedge(*CI, MBBs)) 2723 return true; 2724 } 2725 } 2726 return false; 2727 } 2728 2729 static bool containsNewBackedge(RegionMRT *Region) { 2730 SmallPtrSet<MachineBasicBlock *, 8> MBBs; 2731 return containsNewBackedge(Region, MBBs); 2732 } 2733 2734 bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { 2735 auto *LRegion = initLinearizedRegion(Region); 2736 LRegion->setHasLoop(containsNewBackedge(Region)); 2737 MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region); 2738 MachineBasicBlock *CurrentMerge = LastMerge; 2739 LRegion->addMBB(LastMerge); 2740 LRegion->setExit(LastMerge); 2741 2742 rewriteRegionExitPHIs(Region, LastMerge, LRegion); 2743 removeOldExitPreds(Region); 2744 2745 DEBUG(PHIInfo.dump(MRI)); 2746 2747 auto Entry = Region->getEntry(); 2748 2749 SetVector<MRT *> *Children = Region->getChildren(); 2750 DEBUG(dbgs() << "===========If Region Start===============\n"); 2751 if (LRegion->getHasLoop()) { 2752 DEBUG(dbgs() << "Has Backedge: Yes\n"); 2753 } else { 2754 DEBUG(dbgs() << "Has Backedge: No\n"); 2755 } 2756 2757 unsigned BBSelectRegIn; 2758 unsigned BBSelectRegOut; 2759 for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { 2760 DEBUG(dbgs() << "CurrentRegion: \n"); 2761 DEBUG(LRegion->print(dbgs(), TRI)); 2762 2763 auto CNI = CI; 2764 ++CNI; 2765 2766 MRT *Child = (*CI); 2767 2768 if (Child->isRegion()) { 2769 2770 LinearizedRegion *InnerLRegion = 2771 Child->getRegionMRT()->getLinearizedRegion(); 2772 // We found the block is the exit of an inner region, we need 2773 // to put it in the current linearized region. 2774 2775 DEBUG(dbgs() << "Linearizing region: "); 2776 DEBUG(InnerLRegion->print(dbgs(), TRI)); 2777 DEBUG(dbgs() << "\n"); 2778 2779 MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); 2780 if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) { 2781 // Entry has already been linearized, no need to do this region. 2782 unsigned OuterSelect = InnerLRegion->getBBSelectRegOut(); 2783 unsigned InnerSelectReg = 2784 InnerLRegion->getRegionMRT()->getInnerOutputRegister(); 2785 replaceRegisterWith(InnerSelectReg, OuterSelect), 2786 resolvePHIInfos(InnerEntry); 2787 if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge)) 2788 InnerLRegion->getExit()->addSuccessor(CurrentMerge); 2789 continue; 2790 } 2791 2792 BBSelectRegOut = Child->getBBSelectRegOut(); 2793 BBSelectRegIn = Child->getBBSelectRegIn(); 2794 2795 DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) 2796 << "\n"); 2797 DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) 2798 << "\n"); 2799 2800 MachineBasicBlock *IfEnd = CurrentMerge; 2801 CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, 2802 Child->getRegionMRT()->getEntry(), 2803 BBSelectRegIn, BBSelectRegOut); 2804 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); 2805 } else { 2806 MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); 2807 DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); 2808 2809 if (MBB == getSingleExitNode(*(MBB->getParent()))) { 2810 // If this is the exit block then we need to skip to the next. 2811 // The "in" register will be transferred to "out" in the next 2812 // iteration. 2813 continue; 2814 } 2815 2816 BBSelectRegOut = Child->getBBSelectRegOut(); 2817 BBSelectRegIn = Child->getBBSelectRegIn(); 2818 2819 DEBUG(dbgs() << "BBSelectRegIn: " << PrintReg(BBSelectRegIn, TRI) 2820 << "\n"); 2821 DEBUG(dbgs() << "BBSelectRegOut: " << PrintReg(BBSelectRegOut, TRI) 2822 << "\n"); 2823 2824 MachineBasicBlock *IfEnd = CurrentMerge; 2825 // This is a basic block that is not part of an inner region, we 2826 // need to put it in the current linearized region. 2827 CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn, 2828 BBSelectRegOut); 2829 if (CurrentMerge) { 2830 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); 2831 } 2832 2833 DEBUG(PHIInfo.dump(MRI)); 2834 } 2835 } 2836 2837 LRegion->removeFalseRegisterKills(MRI); 2838 2839 if (LRegion->getHasLoop()) { 2840 MachineBasicBlock *NewSucc = splitEntry(LRegion); 2841 if (isFunctionEntryBlock(LRegion->getEntry())) { 2842 resolvePHIInfos(LRegion->getEntry()); 2843 } 2844 const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); 2845 unsigned InReg = LRegion->getBBSelectRegIn(); 2846 unsigned InnerSelectReg = 2847 MRI->createVirtualRegister(MRI->getRegClass(InReg)); 2848 unsigned NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); 2849 TII->materializeImmediate(*(LRegion->getEntry()), 2850 LRegion->getEntry()->getFirstTerminator(), DL, 2851 NewInReg, Region->getEntry()->getNumber()); 2852 // Need to be careful about updating the registers inside the region. 2853 LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI); 2854 DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n"); 2855 insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc, 2856 InnerSelectReg, NewInReg, 2857 LRegion->getRegionMRT()->getInnerOutputRegister()); 2858 splitExit(LRegion); 2859 TII->convertNonUniformLoopRegion(NewSucc, LastMerge); 2860 } 2861 2862 if (Region->isRoot()) { 2863 TII->insertReturn(*LastMerge); 2864 } 2865 2866 DEBUG(Entry->getParent()->dump()); 2867 DEBUG(LRegion->print(dbgs(), TRI)); 2868 DEBUG(PHIInfo.dump(MRI)); 2869 2870 DEBUG(dbgs() << "===========If Region End===============\n"); 2871 2872 Region->setLinearizedRegion(LRegion); 2873 return true; 2874 } 2875 2876 bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) { 2877 if (false && regionIsSimpleIf(Region)) { 2878 transformSimpleIfRegion(Region); 2879 return true; 2880 } else if (regionIsSequence(Region)) { 2881 fixupRegionExits(Region); 2882 return false; 2883 } else { 2884 structurizeComplexRegion(Region); 2885 } 2886 return false; 2887 } 2888 2889 static int structurize_once = 0; 2890 2891 bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region, 2892 bool isTopRegion) { 2893 bool Changed = false; 2894 2895 auto Children = Region->getChildren(); 2896 for (auto CI : *Children) { 2897 if (CI->isRegion()) { 2898 Changed |= structurizeRegions(CI->getRegionMRT(), false); 2899 } 2900 } 2901 2902 if (structurize_once < 2 || true) { 2903 Changed |= structurizeRegion(Region); 2904 structurize_once++; 2905 } 2906 return Changed; 2907 } 2908 2909 void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { 2910 DEBUG(dbgs() << "Fallthrough Map:\n"); 2911 for (auto &MBBI : MF) { 2912 MachineBasicBlock *MBB = MBBI.getFallThrough(); 2913 if (MBB != nullptr) { 2914 DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> " 2915 << MBB->getNumber() << "\n"); 2916 } 2917 FallthroughMap[&MBBI] = MBB; 2918 } 2919 } 2920 2921 void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region, 2922 unsigned SelectOut) { 2923 LinearizedRegion *LRegion = new LinearizedRegion(); 2924 if (SelectOut) { 2925 LRegion->addLiveOut(SelectOut); 2926 DEBUG(dbgs() << "Add LiveOut (BBSelect): " << PrintReg(SelectOut, TRI) 2927 << "\n"); 2928 } 2929 LRegion->setRegionMRT(Region); 2930 Region->setLinearizedRegion(LRegion); 2931 LRegion->setParent(Region->getParent() 2932 ? Region->getParent()->getLinearizedRegion() 2933 : nullptr); 2934 } 2935 2936 unsigned 2937 AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut, 2938 MachineRegisterInfo *MRI, 2939 const SIInstrInfo *TII) { 2940 if (MRT->isRegion()) { 2941 RegionMRT *Region = MRT->getRegionMRT(); 2942 Region->setBBSelectRegOut(SelectOut); 2943 unsigned InnerSelectOut = createBBSelectReg(TII, MRI); 2944 2945 // Fixme: Move linearization creation to the original spot 2946 createLinearizedRegion(Region, SelectOut); 2947 2948 for (auto CI = Region->getChildren()->begin(), 2949 CE = Region->getChildren()->end(); 2950 CI != CE; ++CI) { 2951 InnerSelectOut = 2952 initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII); 2953 } 2954 MRT->setBBSelectRegIn(InnerSelectOut); 2955 return InnerSelectOut; 2956 } else { 2957 MRT->setBBSelectRegOut(SelectOut); 2958 unsigned NewSelectIn = createBBSelectReg(TII, MRI); 2959 MRT->setBBSelectRegIn(NewSelectIn); 2960 return NewSelectIn; 2961 } 2962 } 2963 2964 static void checkRegOnlyPHIInputs(MachineFunction &MF) { 2965 for (auto &MBBI : MF) { 2966 for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(), 2967 E = MBBI.instr_end(); 2968 I != E; ++I) { 2969 MachineInstr &Instr = *I; 2970 if (Instr.isPHI()) { 2971 int numPreds = getPHINumInputs(Instr); 2972 for (int i = 0; i < numPreds; ++i) { 2973 assert(Instr.getOperand(i * 2 + 1).isReg() && 2974 "PHI Operand not a register"); 2975 } 2976 } 2977 } 2978 } 2979 } 2980 2981 2982 INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", 2983 "AMDGPU Machine CFG Structurizer", false, false) 2984 INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) 2985 INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", 2986 "AMDGPU Machine CFG Structurizer", false, false) 2987 2988 char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID; 2989 2990 2991 bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { 2992 const SISubtarget &ST = MF.getSubtarget<SISubtarget>(); 2993 const SIInstrInfo *TII = ST.getInstrInfo(); 2994 TRI = ST.getRegisterInfo(); 2995 MRI = &(MF.getRegInfo()); 2996 initFallthroughMap(MF); 2997 2998 checkRegOnlyPHIInputs(MF); 2999 DEBUG(dbgs() << "----STRUCTURIZER START----\n"); 3000 DEBUG(MF.dump()); 3001 3002 Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo()); 3003 DEBUG(Regions->dump()); 3004 3005 RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI); 3006 setRegionMRT(RTree); 3007 initializeSelectRegisters(RTree, 0, MRI, TII); 3008 DEBUG(RTree->dump(TRI)); 3009 bool result = structurizeRegions(RTree, true); 3010 delete RTree; 3011 DEBUG(dbgs() << "----STRUCTURIZER END----\n"); 3012 initFallthroughMap(MF); 3013 return result; 3014 } 3015 3016 FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() { 3017 return new AMDGPUMachineCFGStructurizer(); 3018 } 3019 3020