1 //===- AMDGPUMachineCFGStructurizer.cpp - Machine code if conversion pass. ===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file implements the machine instruction level CFG structurizer pass. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AMDGPU.h" 14 #include "AMDGPUSubtarget.h" 15 #include "SIInstrInfo.h" 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/PostOrderIterator.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallPtrSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/CodeGen/MachineBasicBlock.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineInstr.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachineRegionInfo.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/CodeGen/TargetOpcodes.h" 32 #include "llvm/CodeGen/TargetRegisterInfo.h" 33 #include "llvm/Config/llvm-config.h" 34 #include "llvm/IR/DebugLoc.h" 35 #include "llvm/InitializePasses.h" 36 #include "llvm/Pass.h" 37 #include "llvm/Support/Compiler.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/ErrorHandling.h" 40 #include "llvm/Support/raw_ostream.h" 41 #include <cassert> 42 #include <tuple> 43 #include <utility> 44 45 using namespace llvm; 46 47 #define DEBUG_TYPE "amdgpucfgstructurizer" 48 49 namespace { 50 51 class PHILinearizeDestIterator; 52 53 class PHILinearize { 54 friend class PHILinearizeDestIterator; 55 56 public: 57 using PHISourceT = std::pair<unsigned, MachineBasicBlock *>; 58 59 private: 60 using PHISourcesT = DenseSet<PHISourceT>; 61 using PHIInfoElementT = struct { 62 unsigned DestReg; 63 DebugLoc DL; 64 PHISourcesT Sources; 65 }; 66 using PHIInfoT = SmallPtrSet<PHIInfoElementT *, 2>; 67 PHIInfoT PHIInfo; 68 69 static unsigned phiInfoElementGetDest(PHIInfoElementT *Info); 70 static void phiInfoElementSetDef(PHIInfoElementT *Info, unsigned NewDef); 71 static PHISourcesT &phiInfoElementGetSources(PHIInfoElementT *Info); 72 static void phiInfoElementAddSource(PHIInfoElementT *Info, unsigned SourceReg, 73 MachineBasicBlock *SourceMBB); 74 static void phiInfoElementRemoveSource(PHIInfoElementT *Info, 75 unsigned SourceReg, 76 MachineBasicBlock *SourceMBB); 77 PHIInfoElementT *findPHIInfoElement(unsigned DestReg); 78 PHIInfoElementT *findPHIInfoElementFromSource(unsigned SourceReg, 79 MachineBasicBlock *SourceMBB); 80 81 public: 82 bool findSourcesFromMBB(MachineBasicBlock *SourceMBB, 83 SmallVector<unsigned, 4> &Sources); 84 void addDest(unsigned DestReg, const DebugLoc &DL); 85 void replaceDef(unsigned OldDestReg, unsigned NewDestReg); 86 void deleteDef(unsigned DestReg); 87 void addSource(unsigned DestReg, unsigned SourceReg, 88 MachineBasicBlock *SourceMBB); 89 void removeSource(unsigned DestReg, unsigned SourceReg, 90 MachineBasicBlock *SourceMBB = nullptr); 91 bool findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, 92 unsigned &DestReg); 93 bool isSource(unsigned Reg, MachineBasicBlock *SourceMBB = nullptr); 94 unsigned getNumSources(unsigned DestReg); 95 void dump(MachineRegisterInfo *MRI); 96 void clear(); 97 98 using source_iterator = PHISourcesT::iterator; 99 using dest_iterator = PHILinearizeDestIterator; 100 101 dest_iterator dests_begin(); 102 dest_iterator dests_end(); 103 104 source_iterator sources_begin(unsigned Reg); 105 source_iterator sources_end(unsigned Reg); 106 }; 107 108 class PHILinearizeDestIterator { 109 private: 110 PHILinearize::PHIInfoT::iterator Iter; 111 112 public: 113 PHILinearizeDestIterator(PHILinearize::PHIInfoT::iterator I) : Iter(I) {} 114 115 unsigned operator*() { return PHILinearize::phiInfoElementGetDest(*Iter); } 116 PHILinearizeDestIterator &operator++() { 117 ++Iter; 118 return *this; 119 } 120 bool operator==(const PHILinearizeDestIterator &I) const { 121 return I.Iter == Iter; 122 } 123 bool operator!=(const PHILinearizeDestIterator &I) const { 124 return I.Iter != Iter; 125 } 126 }; 127 128 } // end anonymous namespace 129 130 unsigned PHILinearize::phiInfoElementGetDest(PHIInfoElementT *Info) { 131 return Info->DestReg; 132 } 133 134 void PHILinearize::phiInfoElementSetDef(PHIInfoElementT *Info, 135 unsigned NewDef) { 136 Info->DestReg = NewDef; 137 } 138 139 PHILinearize::PHISourcesT & 140 PHILinearize::phiInfoElementGetSources(PHIInfoElementT *Info) { 141 return Info->Sources; 142 } 143 144 void PHILinearize::phiInfoElementAddSource(PHIInfoElementT *Info, 145 unsigned SourceReg, 146 MachineBasicBlock *SourceMBB) { 147 // Assertion ensures we don't use the same SourceMBB for the 148 // sources, because we cannot have different registers with 149 // identical predecessors, but we can have the same register for 150 // multiple predecessors. 151 #if !defined(NDEBUG) 152 for (auto SI : phiInfoElementGetSources(Info)) { 153 assert((SI.second != SourceMBB || SourceReg == SI.first)); 154 } 155 #endif 156 157 phiInfoElementGetSources(Info).insert(PHISourceT(SourceReg, SourceMBB)); 158 } 159 160 void PHILinearize::phiInfoElementRemoveSource(PHIInfoElementT *Info, 161 unsigned SourceReg, 162 MachineBasicBlock *SourceMBB) { 163 auto &Sources = phiInfoElementGetSources(Info); 164 SmallVector<PHISourceT, 4> ElimiatedSources; 165 for (auto SI : Sources) { 166 if (SI.first == SourceReg && 167 (SI.second == nullptr || SI.second == SourceMBB)) { 168 ElimiatedSources.push_back(PHISourceT(SI.first, SI.second)); 169 } 170 } 171 172 for (auto &Source : ElimiatedSources) { 173 Sources.erase(Source); 174 } 175 } 176 177 PHILinearize::PHIInfoElementT * 178 PHILinearize::findPHIInfoElement(unsigned DestReg) { 179 for (auto I : PHIInfo) { 180 if (phiInfoElementGetDest(I) == DestReg) { 181 return I; 182 } 183 } 184 return nullptr; 185 } 186 187 PHILinearize::PHIInfoElementT * 188 PHILinearize::findPHIInfoElementFromSource(unsigned SourceReg, 189 MachineBasicBlock *SourceMBB) { 190 for (auto I : PHIInfo) { 191 for (auto SI : phiInfoElementGetSources(I)) { 192 if (SI.first == SourceReg && 193 (SI.second == nullptr || SI.second == SourceMBB)) { 194 return I; 195 } 196 } 197 } 198 return nullptr; 199 } 200 201 bool PHILinearize::findSourcesFromMBB(MachineBasicBlock *SourceMBB, 202 SmallVector<unsigned, 4> &Sources) { 203 bool FoundSource = false; 204 for (auto I : PHIInfo) { 205 for (auto SI : phiInfoElementGetSources(I)) { 206 if (SI.second == SourceMBB) { 207 FoundSource = true; 208 Sources.push_back(SI.first); 209 } 210 } 211 } 212 return FoundSource; 213 } 214 215 void PHILinearize::addDest(unsigned DestReg, const DebugLoc &DL) { 216 assert(findPHIInfoElement(DestReg) == nullptr && "Dest already exsists"); 217 PHISourcesT EmptySet; 218 PHIInfoElementT *NewElement = new PHIInfoElementT(); 219 NewElement->DestReg = DestReg; 220 NewElement->DL = DL; 221 NewElement->Sources = EmptySet; 222 PHIInfo.insert(NewElement); 223 } 224 225 void PHILinearize::replaceDef(unsigned OldDestReg, unsigned NewDestReg) { 226 phiInfoElementSetDef(findPHIInfoElement(OldDestReg), NewDestReg); 227 } 228 229 void PHILinearize::deleteDef(unsigned DestReg) { 230 PHIInfoElementT *InfoElement = findPHIInfoElement(DestReg); 231 PHIInfo.erase(InfoElement); 232 delete InfoElement; 233 } 234 235 void PHILinearize::addSource(unsigned DestReg, unsigned SourceReg, 236 MachineBasicBlock *SourceMBB) { 237 phiInfoElementAddSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); 238 } 239 240 void PHILinearize::removeSource(unsigned DestReg, unsigned SourceReg, 241 MachineBasicBlock *SourceMBB) { 242 phiInfoElementRemoveSource(findPHIInfoElement(DestReg), SourceReg, SourceMBB); 243 } 244 245 bool PHILinearize::findDest(unsigned SourceReg, MachineBasicBlock *SourceMBB, 246 unsigned &DestReg) { 247 PHIInfoElementT *InfoElement = 248 findPHIInfoElementFromSource(SourceReg, SourceMBB); 249 if (InfoElement != nullptr) { 250 DestReg = phiInfoElementGetDest(InfoElement); 251 return true; 252 } 253 return false; 254 } 255 256 bool PHILinearize::isSource(unsigned Reg, MachineBasicBlock *SourceMBB) { 257 unsigned DestReg; 258 return findDest(Reg, SourceMBB, DestReg); 259 } 260 261 unsigned PHILinearize::getNumSources(unsigned DestReg) { 262 return phiInfoElementGetSources(findPHIInfoElement(DestReg)).size(); 263 } 264 265 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 266 LLVM_DUMP_METHOD void PHILinearize::dump(MachineRegisterInfo *MRI) { 267 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 268 dbgs() << "=PHIInfo Start=\n"; 269 for (auto PII : this->PHIInfo) { 270 PHIInfoElementT &Element = *PII; 271 dbgs() << "Dest: " << printReg(Element.DestReg, TRI) 272 << " Sources: {"; 273 for (auto &SI : Element.Sources) { 274 dbgs() << printReg(SI.first, TRI) << '(' << printMBBReference(*SI.second) 275 << "),"; 276 } 277 dbgs() << "}\n"; 278 } 279 dbgs() << "=PHIInfo End=\n"; 280 } 281 #endif 282 283 void PHILinearize::clear() { PHIInfo = PHIInfoT(); } 284 285 PHILinearize::dest_iterator PHILinearize::dests_begin() { 286 return PHILinearizeDestIterator(PHIInfo.begin()); 287 } 288 289 PHILinearize::dest_iterator PHILinearize::dests_end() { 290 return PHILinearizeDestIterator(PHIInfo.end()); 291 } 292 293 PHILinearize::source_iterator PHILinearize::sources_begin(unsigned Reg) { 294 auto InfoElement = findPHIInfoElement(Reg); 295 return phiInfoElementGetSources(InfoElement).begin(); 296 } 297 298 PHILinearize::source_iterator PHILinearize::sources_end(unsigned Reg) { 299 auto InfoElement = findPHIInfoElement(Reg); 300 return phiInfoElementGetSources(InfoElement).end(); 301 } 302 303 static unsigned getPHINumInputs(MachineInstr &PHI) { 304 assert(PHI.isPHI()); 305 return (PHI.getNumOperands() - 1) / 2; 306 } 307 308 static MachineBasicBlock *getPHIPred(MachineInstr &PHI, unsigned Index) { 309 assert(PHI.isPHI()); 310 return PHI.getOperand(Index * 2 + 2).getMBB(); 311 } 312 313 static void setPhiPred(MachineInstr &PHI, unsigned Index, 314 MachineBasicBlock *NewPred) { 315 PHI.getOperand(Index * 2 + 2).setMBB(NewPred); 316 } 317 318 static unsigned getPHISourceReg(MachineInstr &PHI, unsigned Index) { 319 assert(PHI.isPHI()); 320 return PHI.getOperand(Index * 2 + 1).getReg(); 321 } 322 323 static unsigned getPHIDestReg(MachineInstr &PHI) { 324 assert(PHI.isPHI()); 325 return PHI.getOperand(0).getReg(); 326 } 327 328 namespace { 329 330 class RegionMRT; 331 class MBBMRT; 332 333 class LinearizedRegion { 334 protected: 335 MachineBasicBlock *Entry; 336 // The exit block is part of the region, and is the last 337 // merge block before exiting the region. 338 MachineBasicBlock *Exit; 339 DenseSet<unsigned> LiveOuts; 340 SmallPtrSet<MachineBasicBlock *, 1> MBBs; 341 bool HasLoop; 342 LinearizedRegion *Parent; 343 RegionMRT *RMRT; 344 345 void storeLiveOutReg(MachineBasicBlock *MBB, Register Reg, 346 MachineInstr *DefInstr, const MachineRegisterInfo *MRI, 347 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 348 349 void storeLiveOutRegRegion(RegionMRT *Region, Register Reg, 350 MachineInstr *DefInstr, 351 const MachineRegisterInfo *MRI, 352 const TargetRegisterInfo *TRI, 353 PHILinearize &PHIInfo); 354 355 void storeMBBLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 356 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, 357 RegionMRT *TopRegion); 358 359 void storeLiveOuts(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 360 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 361 362 void storeLiveOuts(RegionMRT *Region, const MachineRegisterInfo *MRI, 363 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo, 364 RegionMRT *TopRegion = nullptr); 365 366 public: 367 LinearizedRegion(); 368 LinearizedRegion(MachineBasicBlock *MBB, const MachineRegisterInfo *MRI, 369 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 370 ~LinearizedRegion() = default; 371 372 void setRegionMRT(RegionMRT *Region) { RMRT = Region; } 373 374 RegionMRT *getRegionMRT() { return RMRT; } 375 376 void setParent(LinearizedRegion *P) { Parent = P; } 377 378 LinearizedRegion *getParent() { return Parent; } 379 380 void print(raw_ostream &OS, const TargetRegisterInfo *TRI = nullptr); 381 382 void setBBSelectRegIn(unsigned Reg); 383 384 unsigned getBBSelectRegIn(); 385 386 void setBBSelectRegOut(unsigned Reg, bool IsLiveOut); 387 388 unsigned getBBSelectRegOut(); 389 390 void setHasLoop(bool Value); 391 392 bool getHasLoop(); 393 394 void addLiveOut(unsigned VReg); 395 396 void removeLiveOut(unsigned Reg); 397 398 void replaceLiveOut(unsigned OldReg, unsigned NewReg); 399 400 void replaceRegister(unsigned Register, class Register NewRegister, 401 MachineRegisterInfo *MRI, bool ReplaceInside, 402 bool ReplaceOutside, bool IncludeLoopPHIs); 403 404 void replaceRegisterInsideRegion(unsigned Register, unsigned NewRegister, 405 bool IncludeLoopPHIs, 406 MachineRegisterInfo *MRI); 407 408 void replaceRegisterOutsideRegion(unsigned Register, unsigned NewRegister, 409 bool IncludeLoopPHIs, 410 MachineRegisterInfo *MRI); 411 412 DenseSet<unsigned> *getLiveOuts(); 413 414 void setEntry(MachineBasicBlock *NewEntry); 415 416 MachineBasicBlock *getEntry(); 417 418 void setExit(MachineBasicBlock *NewExit); 419 420 MachineBasicBlock *getExit(); 421 422 void addMBB(MachineBasicBlock *MBB); 423 424 void addMBBs(LinearizedRegion *InnerRegion); 425 426 bool contains(MachineBasicBlock *MBB); 427 428 bool isLiveOut(unsigned Reg); 429 430 bool hasNoDef(unsigned Reg, MachineRegisterInfo *MRI); 431 432 void removeFalseRegisterKills(MachineRegisterInfo *MRI); 433 434 void initLiveOut(RegionMRT *Region, const MachineRegisterInfo *MRI, 435 const TargetRegisterInfo *TRI, PHILinearize &PHIInfo); 436 }; 437 438 class MRT { 439 protected: 440 RegionMRT *Parent; 441 unsigned BBSelectRegIn; 442 unsigned BBSelectRegOut; 443 444 public: 445 virtual ~MRT() = default; 446 447 unsigned getBBSelectRegIn() { return BBSelectRegIn; } 448 449 unsigned getBBSelectRegOut() { return BBSelectRegOut; } 450 451 void setBBSelectRegIn(unsigned Reg) { BBSelectRegIn = Reg; } 452 453 void setBBSelectRegOut(unsigned Reg) { BBSelectRegOut = Reg; } 454 455 virtual RegionMRT *getRegionMRT() { return nullptr; } 456 457 virtual MBBMRT *getMBBMRT() { return nullptr; } 458 459 bool isRegion() { return getRegionMRT() != nullptr; } 460 461 bool isMBB() { return getMBBMRT() != nullptr; } 462 463 bool isRoot() { return Parent == nullptr; } 464 465 void setParent(RegionMRT *Region) { Parent = Region; } 466 467 RegionMRT *getParent() { return Parent; } 468 469 static MachineBasicBlock * 470 initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, 471 DenseMap<MachineRegion *, RegionMRT *> &RegionMap); 472 473 static RegionMRT *buildMRT(MachineFunction &MF, 474 const MachineRegionInfo *RegionInfo, 475 const SIInstrInfo *TII, 476 MachineRegisterInfo *MRI); 477 478 virtual void dump(const TargetRegisterInfo *TRI, int depth = 0) = 0; 479 480 void dumpDepth(int depth) { 481 for (int i = depth; i > 0; --i) { 482 dbgs() << " "; 483 } 484 } 485 }; 486 487 class MBBMRT : public MRT { 488 MachineBasicBlock *MBB; 489 490 public: 491 MBBMRT(MachineBasicBlock *BB) : MBB(BB) { 492 setParent(nullptr); 493 setBBSelectRegOut(0); 494 setBBSelectRegIn(0); 495 } 496 497 MBBMRT *getMBBMRT() override { return this; } 498 499 MachineBasicBlock *getMBB() { return MBB; } 500 501 void dump(const TargetRegisterInfo *TRI, int depth = 0) override { 502 dumpDepth(depth); 503 dbgs() << "MBB: " << getMBB()->getNumber(); 504 dbgs() << " In: " << printReg(getBBSelectRegIn(), TRI); 505 dbgs() << ", Out: " << printReg(getBBSelectRegOut(), TRI) << "\n"; 506 } 507 }; 508 509 class RegionMRT : public MRT { 510 protected: 511 MachineRegion *Region; 512 LinearizedRegion *LRegion = nullptr; 513 MachineBasicBlock *Succ = nullptr; 514 SetVector<MRT *> Children; 515 516 public: 517 RegionMRT(MachineRegion *MachineRegion) : Region(MachineRegion) { 518 setParent(nullptr); 519 setBBSelectRegOut(0); 520 setBBSelectRegIn(0); 521 } 522 523 ~RegionMRT() override { 524 if (LRegion) { 525 delete LRegion; 526 } 527 528 for (auto CI : Children) { 529 delete &(*CI); 530 } 531 } 532 533 RegionMRT *getRegionMRT() override { return this; } 534 535 void setLinearizedRegion(LinearizedRegion *LinearizeRegion) { 536 LRegion = LinearizeRegion; 537 } 538 539 LinearizedRegion *getLinearizedRegion() { return LRegion; } 540 541 MachineRegion *getMachineRegion() { return Region; } 542 543 unsigned getInnerOutputRegister() { 544 return (*(Children.begin()))->getBBSelectRegOut(); 545 } 546 547 void addChild(MRT *Tree) { Children.insert(Tree); } 548 549 SetVector<MRT *> *getChildren() { return &Children; } 550 551 void dump(const TargetRegisterInfo *TRI, int depth = 0) override { 552 dumpDepth(depth); 553 dbgs() << "Region: " << (void *)Region; 554 dbgs() << " In: " << printReg(getBBSelectRegIn(), TRI); 555 dbgs() << ", Out: " << printReg(getBBSelectRegOut(), TRI) << "\n"; 556 557 dumpDepth(depth); 558 if (getSucc()) 559 dbgs() << "Succ: " << getSucc()->getNumber() << "\n"; 560 else 561 dbgs() << "Succ: none \n"; 562 for (auto MRTI : Children) { 563 MRTI->dump(TRI, depth + 1); 564 } 565 } 566 567 MRT *getEntryTree() { return Children.back(); } 568 569 MRT *getExitTree() { return Children.front(); } 570 571 MachineBasicBlock *getEntry() { 572 MRT *Tree = Children.back(); 573 return (Tree->isRegion()) ? Tree->getRegionMRT()->getEntry() 574 : Tree->getMBBMRT()->getMBB(); 575 } 576 577 MachineBasicBlock *getExit() { 578 MRT *Tree = Children.front(); 579 return (Tree->isRegion()) ? Tree->getRegionMRT()->getExit() 580 : Tree->getMBBMRT()->getMBB(); 581 } 582 583 void setSucc(MachineBasicBlock *MBB) { Succ = MBB; } 584 585 MachineBasicBlock *getSucc() { return Succ; } 586 587 bool contains(MachineBasicBlock *MBB) { 588 for (auto CI : Children) { 589 if (CI->isMBB()) { 590 if (MBB == CI->getMBBMRT()->getMBB()) { 591 return true; 592 } 593 } else { 594 if (CI->getRegionMRT()->contains(MBB)) { 595 return true; 596 } else if (CI->getRegionMRT()->getLinearizedRegion() != nullptr && 597 CI->getRegionMRT()->getLinearizedRegion()->contains(MBB)) { 598 return true; 599 } 600 } 601 } 602 return false; 603 } 604 605 void replaceLiveOutReg(unsigned Register, unsigned NewRegister) { 606 LinearizedRegion *LRegion = getLinearizedRegion(); 607 LRegion->replaceLiveOut(Register, NewRegister); 608 for (auto &CI : Children) { 609 if (CI->isRegion()) { 610 CI->getRegionMRT()->replaceLiveOutReg(Register, NewRegister); 611 } 612 } 613 } 614 }; 615 616 } // end anonymous namespace 617 618 static unsigned createBBSelectReg(const SIInstrInfo *TII, 619 MachineRegisterInfo *MRI) { 620 return MRI->createVirtualRegister(TII->getPreferredSelectRegClass(32)); 621 } 622 623 MachineBasicBlock * 624 MRT::initializeMRT(MachineFunction &MF, const MachineRegionInfo *RegionInfo, 625 DenseMap<MachineRegion *, RegionMRT *> &RegionMap) { 626 for (auto &MFI : MF) { 627 MachineBasicBlock *ExitMBB = &MFI; 628 if (ExitMBB->succ_size() == 0) { 629 return ExitMBB; 630 } 631 } 632 llvm_unreachable("CFG has no exit block"); 633 return nullptr; 634 } 635 636 RegionMRT *MRT::buildMRT(MachineFunction &MF, 637 const MachineRegionInfo *RegionInfo, 638 const SIInstrInfo *TII, MachineRegisterInfo *MRI) { 639 SmallPtrSet<MachineRegion *, 4> PlacedRegions; 640 DenseMap<MachineRegion *, RegionMRT *> RegionMap; 641 MachineRegion *TopLevelRegion = RegionInfo->getTopLevelRegion(); 642 RegionMRT *Result = new RegionMRT(TopLevelRegion); 643 RegionMap[TopLevelRegion] = Result; 644 645 // Insert the exit block first, we need it to be the merge node 646 // for the top level region. 647 MachineBasicBlock *Exit = initializeMRT(MF, RegionInfo, RegionMap); 648 649 unsigned BBSelectRegIn = createBBSelectReg(TII, MRI); 650 MBBMRT *ExitMRT = new MBBMRT(Exit); 651 RegionMap[RegionInfo->getRegionFor(Exit)]->addChild(ExitMRT); 652 ExitMRT->setBBSelectRegIn(BBSelectRegIn); 653 654 for (auto MBBI : post_order(&(MF.front()))) { 655 MachineBasicBlock *MBB = &(*MBBI); 656 657 // Skip Exit since we already added it 658 if (MBB == Exit) { 659 continue; 660 } 661 662 LLVM_DEBUG(dbgs() << "Visiting " << printMBBReference(*MBB) << "\n"); 663 MBBMRT *NewMBB = new MBBMRT(MBB); 664 MachineRegion *Region = RegionInfo->getRegionFor(MBB); 665 666 // Ensure we have the MRT region 667 if (RegionMap.count(Region) == 0) { 668 RegionMRT *NewMRTRegion = new RegionMRT(Region); 669 RegionMap[Region] = NewMRTRegion; 670 671 // Ensure all parents are in the RegionMap 672 MachineRegion *Parent = Region->getParent(); 673 while (RegionMap.count(Parent) == 0) { 674 RegionMRT *NewMRTParent = new RegionMRT(Parent); 675 NewMRTParent->addChild(NewMRTRegion); 676 NewMRTRegion->setParent(NewMRTParent); 677 RegionMap[Parent] = NewMRTParent; 678 NewMRTRegion = NewMRTParent; 679 Parent = Parent->getParent(); 680 } 681 RegionMap[Parent]->addChild(NewMRTRegion); 682 NewMRTRegion->setParent(RegionMap[Parent]); 683 } 684 685 // Add MBB to Region MRT 686 RegionMap[Region]->addChild(NewMBB); 687 NewMBB->setParent(RegionMap[Region]); 688 RegionMap[Region]->setSucc(Region->getExit()); 689 } 690 return Result; 691 } 692 693 void LinearizedRegion::storeLiveOutReg(MachineBasicBlock *MBB, Register Reg, 694 MachineInstr *DefInstr, 695 const MachineRegisterInfo *MRI, 696 const TargetRegisterInfo *TRI, 697 PHILinearize &PHIInfo) { 698 if (Reg.isVirtual()) { 699 LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) 700 << "\n"); 701 // If this is a source register to a PHI we are chaining, it 702 // must be live out. 703 if (PHIInfo.isSource(Reg)) { 704 LLVM_DEBUG(dbgs() << "Add LiveOut (PHI): " << printReg(Reg, TRI) << "\n"); 705 addLiveOut(Reg); 706 } else { 707 // If this is live out of the MBB 708 for (auto &UI : MRI->use_operands(Reg)) { 709 if (UI.getParent()->getParent() != MBB) { 710 LLVM_DEBUG(dbgs() << "Add LiveOut (MBB " << printMBBReference(*MBB) 711 << "): " << printReg(Reg, TRI) << "\n"); 712 addLiveOut(Reg); 713 } else { 714 // If the use is in the same MBB we have to make sure 715 // it is after the def, otherwise it is live out in a loop 716 MachineInstr *UseInstr = UI.getParent(); 717 for (MachineBasicBlock::instr_iterator 718 MII = UseInstr->getIterator(), 719 MIE = UseInstr->getParent()->instr_end(); 720 MII != MIE; ++MII) { 721 if ((&(*MII)) == DefInstr) { 722 LLVM_DEBUG(dbgs() << "Add LiveOut (Loop): " << printReg(Reg, TRI) 723 << "\n"); 724 addLiveOut(Reg); 725 } 726 } 727 } 728 } 729 } 730 } 731 } 732 733 void LinearizedRegion::storeLiveOutRegRegion(RegionMRT *Region, Register Reg, 734 MachineInstr *DefInstr, 735 const MachineRegisterInfo *MRI, 736 const TargetRegisterInfo *TRI, 737 PHILinearize &PHIInfo) { 738 if (Reg.isVirtual()) { 739 LLVM_DEBUG(dbgs() << "Considering Register: " << printReg(Reg, TRI) 740 << "\n"); 741 for (auto &UI : MRI->use_operands(Reg)) { 742 if (!Region->contains(UI.getParent()->getParent())) { 743 LLVM_DEBUG(dbgs() << "Add LiveOut (Region " << (void *)Region 744 << "): " << printReg(Reg, TRI) << "\n"); 745 addLiveOut(Reg); 746 } 747 } 748 } 749 } 750 751 void LinearizedRegion::storeLiveOuts(MachineBasicBlock *MBB, 752 const MachineRegisterInfo *MRI, 753 const TargetRegisterInfo *TRI, 754 PHILinearize &PHIInfo) { 755 LLVM_DEBUG(dbgs() << "-Store Live Outs Begin (" << printMBBReference(*MBB) 756 << ")-\n"); 757 for (auto &II : *MBB) { 758 for (auto &RI : II.defs()) { 759 storeLiveOutReg(MBB, RI.getReg(), RI.getParent(), MRI, TRI, PHIInfo); 760 } 761 for (auto &IRI : II.implicit_operands()) { 762 if (IRI.isDef()) { 763 storeLiveOutReg(MBB, IRI.getReg(), IRI.getParent(), MRI, TRI, PHIInfo); 764 } 765 } 766 } 767 768 // If we have a successor with a PHI, source coming from this MBB we have to 769 // add the register as live out 770 for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), 771 E = MBB->succ_end(); 772 SI != E; ++SI) { 773 for (auto &II : *(*SI)) { 774 if (II.isPHI()) { 775 MachineInstr &PHI = II; 776 int numPreds = getPHINumInputs(PHI); 777 for (int i = 0; i < numPreds; ++i) { 778 if (getPHIPred(PHI, i) == MBB) { 779 unsigned PHIReg = getPHISourceReg(PHI, i); 780 LLVM_DEBUG(dbgs() 781 << "Add LiveOut (PhiSource " << printMBBReference(*MBB) 782 << " -> " << printMBBReference(*(*SI)) 783 << "): " << printReg(PHIReg, TRI) << "\n"); 784 addLiveOut(PHIReg); 785 } 786 } 787 } 788 } 789 } 790 791 LLVM_DEBUG(dbgs() << "-Store Live Outs Endn-\n"); 792 } 793 794 void LinearizedRegion::storeMBBLiveOuts(MachineBasicBlock *MBB, 795 const MachineRegisterInfo *MRI, 796 const TargetRegisterInfo *TRI, 797 PHILinearize &PHIInfo, 798 RegionMRT *TopRegion) { 799 for (auto &II : *MBB) { 800 for (auto &RI : II.defs()) { 801 storeLiveOutRegRegion(TopRegion, RI.getReg(), RI.getParent(), MRI, TRI, 802 PHIInfo); 803 } 804 for (auto &IRI : II.implicit_operands()) { 805 if (IRI.isDef()) { 806 storeLiveOutRegRegion(TopRegion, IRI.getReg(), IRI.getParent(), MRI, 807 TRI, PHIInfo); 808 } 809 } 810 } 811 } 812 813 void LinearizedRegion::storeLiveOuts(RegionMRT *Region, 814 const MachineRegisterInfo *MRI, 815 const TargetRegisterInfo *TRI, 816 PHILinearize &PHIInfo, 817 RegionMRT *CurrentTopRegion) { 818 MachineBasicBlock *Exit = Region->getSucc(); 819 820 RegionMRT *TopRegion = 821 CurrentTopRegion == nullptr ? Region : CurrentTopRegion; 822 823 // Check if exit is end of function, if so, no live outs. 824 if (Exit == nullptr) 825 return; 826 827 auto Children = Region->getChildren(); 828 for (auto CI : *Children) { 829 if (CI->isMBB()) { 830 auto MBB = CI->getMBBMRT()->getMBB(); 831 storeMBBLiveOuts(MBB, MRI, TRI, PHIInfo, TopRegion); 832 } else { 833 LinearizedRegion *SubRegion = CI->getRegionMRT()->getLinearizedRegion(); 834 // We should be limited to only store registers that are live out from the 835 // lineaized region 836 for (auto MBBI : SubRegion->MBBs) { 837 storeMBBLiveOuts(MBBI, MRI, TRI, PHIInfo, TopRegion); 838 } 839 } 840 } 841 842 if (CurrentTopRegion == nullptr) { 843 auto Succ = Region->getSucc(); 844 for (auto &II : *Succ) { 845 if (II.isPHI()) { 846 MachineInstr &PHI = II; 847 int numPreds = getPHINumInputs(PHI); 848 for (int i = 0; i < numPreds; ++i) { 849 if (Region->contains(getPHIPred(PHI, i))) { 850 unsigned PHIReg = getPHISourceReg(PHI, i); 851 LLVM_DEBUG(dbgs() << "Add Region LiveOut (" << (void *)Region 852 << "): " << printReg(PHIReg, TRI) << "\n"); 853 addLiveOut(PHIReg); 854 } 855 } 856 } 857 } 858 } 859 } 860 861 #ifndef NDEBUG 862 void LinearizedRegion::print(raw_ostream &OS, const TargetRegisterInfo *TRI) { 863 OS << "Linearized Region {"; 864 bool IsFirst = true; 865 for (auto MBB : MBBs) { 866 if (IsFirst) { 867 IsFirst = false; 868 } else { 869 OS << " ,"; 870 } 871 OS << MBB->getNumber(); 872 } 873 OS << "} (" << Entry->getNumber() << ", " 874 << (Exit == nullptr ? -1 : Exit->getNumber()) 875 << "): In:" << printReg(getBBSelectRegIn(), TRI) 876 << " Out:" << printReg(getBBSelectRegOut(), TRI) << " {"; 877 for (auto &LI : LiveOuts) { 878 OS << printReg(LI, TRI) << " "; 879 } 880 OS << "} \n"; 881 } 882 #endif 883 884 unsigned LinearizedRegion::getBBSelectRegIn() { 885 return getRegionMRT()->getBBSelectRegIn(); 886 } 887 888 unsigned LinearizedRegion::getBBSelectRegOut() { 889 return getRegionMRT()->getBBSelectRegOut(); 890 } 891 892 void LinearizedRegion::setHasLoop(bool Value) { HasLoop = Value; } 893 894 bool LinearizedRegion::getHasLoop() { return HasLoop; } 895 896 void LinearizedRegion::addLiveOut(unsigned VReg) { LiveOuts.insert(VReg); } 897 898 void LinearizedRegion::removeLiveOut(unsigned Reg) { 899 if (isLiveOut(Reg)) 900 LiveOuts.erase(Reg); 901 } 902 903 void LinearizedRegion::replaceLiveOut(unsigned OldReg, unsigned NewReg) { 904 if (isLiveOut(OldReg)) { 905 removeLiveOut(OldReg); 906 addLiveOut(NewReg); 907 } 908 } 909 910 void LinearizedRegion::replaceRegister(unsigned Register, 911 class Register NewRegister, 912 MachineRegisterInfo *MRI, 913 bool ReplaceInside, bool ReplaceOutside, 914 bool IncludeLoopPHI) { 915 assert(Register != NewRegister && "Cannot replace a reg with itself"); 916 917 LLVM_DEBUG( 918 dbgs() << "Pepareing to replace register (region): " 919 << printReg(Register, MRI->getTargetRegisterInfo()) << " with " 920 << printReg(NewRegister, MRI->getTargetRegisterInfo()) << "\n"); 921 922 // If we are replacing outside, we also need to update the LiveOuts 923 if (ReplaceOutside && 924 (isLiveOut(Register) || this->getParent()->isLiveOut(Register))) { 925 LinearizedRegion *Current = this; 926 while (Current != nullptr && Current->getEntry() != nullptr) { 927 LLVM_DEBUG(dbgs() << "Region before register replace\n"); 928 LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); 929 Current->replaceLiveOut(Register, NewRegister); 930 LLVM_DEBUG(dbgs() << "Region after register replace\n"); 931 LLVM_DEBUG(Current->print(dbgs(), MRI->getTargetRegisterInfo())); 932 Current = Current->getParent(); 933 } 934 } 935 936 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 937 E = MRI->reg_end(); 938 I != E;) { 939 MachineOperand &O = *I; 940 ++I; 941 942 // We don't rewrite defs. 943 if (O.isDef()) 944 continue; 945 946 bool IsInside = contains(O.getParent()->getParent()); 947 bool IsLoopPHI = IsInside && (O.getParent()->isPHI() && 948 O.getParent()->getParent() == getEntry()); 949 bool ShouldReplace = (IsInside && ReplaceInside) || 950 (!IsInside && ReplaceOutside) || 951 (IncludeLoopPHI && IsLoopPHI); 952 if (ShouldReplace) { 953 954 if (NewRegister.isPhysical()) { 955 LLVM_DEBUG(dbgs() << "Trying to substitute physical register: " 956 << printReg(NewRegister, MRI->getTargetRegisterInfo()) 957 << "\n"); 958 llvm_unreachable("Cannot substitute physical registers"); 959 } else { 960 LLVM_DEBUG(dbgs() << "Replacing register (region): " 961 << printReg(Register, MRI->getTargetRegisterInfo()) 962 << " with " 963 << printReg(NewRegister, MRI->getTargetRegisterInfo()) 964 << "\n"); 965 O.setReg(NewRegister); 966 } 967 } 968 } 969 } 970 971 void LinearizedRegion::replaceRegisterInsideRegion(unsigned Register, 972 unsigned NewRegister, 973 bool IncludeLoopPHIs, 974 MachineRegisterInfo *MRI) { 975 replaceRegister(Register, NewRegister, MRI, true, false, IncludeLoopPHIs); 976 } 977 978 void LinearizedRegion::replaceRegisterOutsideRegion(unsigned Register, 979 unsigned NewRegister, 980 bool IncludeLoopPHIs, 981 MachineRegisterInfo *MRI) { 982 replaceRegister(Register, NewRegister, MRI, false, true, IncludeLoopPHIs); 983 } 984 985 DenseSet<unsigned> *LinearizedRegion::getLiveOuts() { return &LiveOuts; } 986 987 void LinearizedRegion::setEntry(MachineBasicBlock *NewEntry) { 988 Entry = NewEntry; 989 } 990 991 MachineBasicBlock *LinearizedRegion::getEntry() { return Entry; } 992 993 void LinearizedRegion::setExit(MachineBasicBlock *NewExit) { Exit = NewExit; } 994 995 MachineBasicBlock *LinearizedRegion::getExit() { return Exit; } 996 997 void LinearizedRegion::addMBB(MachineBasicBlock *MBB) { MBBs.insert(MBB); } 998 999 void LinearizedRegion::addMBBs(LinearizedRegion *InnerRegion) { 1000 for (auto MBB : InnerRegion->MBBs) { 1001 addMBB(MBB); 1002 } 1003 } 1004 1005 bool LinearizedRegion::contains(MachineBasicBlock *MBB) { 1006 return MBBs.count(MBB) == 1; 1007 } 1008 1009 bool LinearizedRegion::isLiveOut(unsigned Reg) { 1010 return LiveOuts.count(Reg) == 1; 1011 } 1012 1013 bool LinearizedRegion::hasNoDef(unsigned Reg, MachineRegisterInfo *MRI) { 1014 return MRI->def_begin(Reg) == MRI->def_end(); 1015 } 1016 1017 // After the code has been structurized, what was flagged as kills 1018 // before are no longer register kills. 1019 void LinearizedRegion::removeFalseRegisterKills(MachineRegisterInfo *MRI) { 1020 const TargetRegisterInfo *TRI = MRI->getTargetRegisterInfo(); 1021 (void)TRI; // It's used by LLVM_DEBUG. 1022 1023 for (auto MBBI : MBBs) { 1024 MachineBasicBlock *MBB = MBBI; 1025 for (auto &II : *MBB) { 1026 for (auto &RI : II.uses()) { 1027 if (RI.isReg()) { 1028 Register Reg = RI.getReg(); 1029 if (Reg.isVirtual()) { 1030 if (hasNoDef(Reg, MRI)) 1031 continue; 1032 if (!MRI->hasOneDef(Reg)) { 1033 LLVM_DEBUG(this->getEntry()->getParent()->dump()); 1034 LLVM_DEBUG(dbgs() << printReg(Reg, TRI) << "\n"); 1035 } 1036 1037 if (MRI->def_begin(Reg) == MRI->def_end()) { 1038 LLVM_DEBUG(dbgs() << "Register " 1039 << printReg(Reg, MRI->getTargetRegisterInfo()) 1040 << " has NO defs\n"); 1041 } else if (!MRI->hasOneDef(Reg)) { 1042 LLVM_DEBUG(dbgs() << "Register " 1043 << printReg(Reg, MRI->getTargetRegisterInfo()) 1044 << " has multiple defs\n"); 1045 } 1046 1047 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 1048 MachineOperand *Def = &(*(MRI->def_begin(Reg))); 1049 MachineOperand *UseOperand = &(RI); 1050 bool UseIsOutsideDefMBB = Def->getParent()->getParent() != MBB; 1051 if (UseIsOutsideDefMBB && UseOperand->isKill()) { 1052 LLVM_DEBUG(dbgs() << "Removing kill flag on register: " 1053 << printReg(Reg, TRI) << "\n"); 1054 UseOperand->setIsKill(false); 1055 } 1056 } 1057 } 1058 } 1059 } 1060 } 1061 } 1062 1063 void LinearizedRegion::initLiveOut(RegionMRT *Region, 1064 const MachineRegisterInfo *MRI, 1065 const TargetRegisterInfo *TRI, 1066 PHILinearize &PHIInfo) { 1067 storeLiveOuts(Region, MRI, TRI, PHIInfo); 1068 } 1069 1070 LinearizedRegion::LinearizedRegion(MachineBasicBlock *MBB, 1071 const MachineRegisterInfo *MRI, 1072 const TargetRegisterInfo *TRI, 1073 PHILinearize &PHIInfo) { 1074 setEntry(MBB); 1075 setExit(MBB); 1076 storeLiveOuts(MBB, MRI, TRI, PHIInfo); 1077 MBBs.insert(MBB); 1078 Parent = nullptr; 1079 } 1080 1081 LinearizedRegion::LinearizedRegion() { 1082 setEntry(nullptr); 1083 setExit(nullptr); 1084 Parent = nullptr; 1085 } 1086 1087 namespace { 1088 1089 class AMDGPUMachineCFGStructurizer : public MachineFunctionPass { 1090 private: 1091 const MachineRegionInfo *Regions; 1092 const SIInstrInfo *TII; 1093 const TargetRegisterInfo *TRI; 1094 MachineRegisterInfo *MRI; 1095 unsigned BBSelectRegister; 1096 PHILinearize PHIInfo; 1097 DenseMap<MachineBasicBlock *, MachineBasicBlock *> FallthroughMap; 1098 RegionMRT *RMRT; 1099 1100 void getPHIRegionIndices(RegionMRT *Region, MachineInstr &PHI, 1101 SmallVector<unsigned, 2> &RegionIndices); 1102 void getPHIRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, 1103 SmallVector<unsigned, 2> &RegionIndices); 1104 void getPHINonRegionIndices(LinearizedRegion *Region, MachineInstr &PHI, 1105 SmallVector<unsigned, 2> &PHINonRegionIndices); 1106 1107 void storePHILinearizationInfoDest( 1108 unsigned LDestReg, MachineInstr &PHI, 1109 SmallVector<unsigned, 2> *RegionIndices = nullptr); 1110 1111 unsigned storePHILinearizationInfo(MachineInstr &PHI, 1112 SmallVector<unsigned, 2> *RegionIndices); 1113 1114 void extractKilledPHIs(MachineBasicBlock *MBB); 1115 1116 bool shrinkPHI(MachineInstr &PHI, SmallVector<unsigned, 2> &PHIIndices, 1117 unsigned *ReplaceReg); 1118 1119 bool shrinkPHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1120 MachineBasicBlock *SourceMBB, 1121 SmallVector<unsigned, 2> &PHIIndices, unsigned *ReplaceReg); 1122 1123 void replacePHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1124 MachineBasicBlock *LastMerge, 1125 SmallVector<unsigned, 2> &PHIRegionIndices); 1126 void replaceEntryPHI(MachineInstr &PHI, unsigned CombinedSourceReg, 1127 MachineBasicBlock *IfMBB, 1128 SmallVector<unsigned, 2> &PHIRegionIndices); 1129 void replaceLiveOutRegs(MachineInstr &PHI, 1130 SmallVector<unsigned, 2> &PHIRegionIndices, 1131 unsigned CombinedSourceReg, 1132 LinearizedRegion *LRegion); 1133 void rewriteRegionExitPHI(RegionMRT *Region, MachineBasicBlock *LastMerge, 1134 MachineInstr &PHI, LinearizedRegion *LRegion); 1135 1136 void rewriteRegionExitPHIs(RegionMRT *Region, MachineBasicBlock *LastMerge, 1137 LinearizedRegion *LRegion); 1138 void rewriteRegionEntryPHI(LinearizedRegion *Region, MachineBasicBlock *IfMBB, 1139 MachineInstr &PHI); 1140 void rewriteRegionEntryPHIs(LinearizedRegion *Region, 1141 MachineBasicBlock *IfMBB); 1142 1143 bool regionIsSimpleIf(RegionMRT *Region); 1144 1145 void transformSimpleIfRegion(RegionMRT *Region); 1146 1147 void eliminateDeadBranchOperands(MachineBasicBlock::instr_iterator &II); 1148 1149 void insertUnconditionalBranch(MachineBasicBlock *MBB, 1150 MachineBasicBlock *Dest, 1151 const DebugLoc &DL = DebugLoc()); 1152 1153 MachineBasicBlock *createLinearizedExitBlock(RegionMRT *Region); 1154 1155 void insertMergePHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1156 MachineBasicBlock *MergeBB, unsigned DestRegister, 1157 unsigned IfSourceRegister, unsigned CodeSourceRegister, 1158 bool IsUndefIfSource = false); 1159 1160 MachineBasicBlock *createIfBlock(MachineBasicBlock *MergeBB, 1161 MachineBasicBlock *CodeBBStart, 1162 MachineBasicBlock *CodeBBEnd, 1163 MachineBasicBlock *SelectBB, unsigned IfReg, 1164 bool InheritPreds); 1165 1166 void prunePHIInfo(MachineBasicBlock *MBB); 1167 void createEntryPHI(LinearizedRegion *CurrentRegion, unsigned DestReg); 1168 1169 void createEntryPHIs(LinearizedRegion *CurrentRegion); 1170 void resolvePHIInfos(MachineBasicBlock *FunctionEntry); 1171 1172 void replaceRegisterWith(unsigned Register, class Register NewRegister); 1173 1174 MachineBasicBlock *createIfRegion(MachineBasicBlock *MergeBB, 1175 MachineBasicBlock *CodeBB, 1176 LinearizedRegion *LRegion, 1177 unsigned BBSelectRegIn, 1178 unsigned BBSelectRegOut); 1179 1180 MachineBasicBlock * 1181 createIfRegion(MachineBasicBlock *MergeMBB, LinearizedRegion *InnerRegion, 1182 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, 1183 unsigned BBSelectRegIn, unsigned BBSelectRegOut); 1184 void ensureCondIsNotKilled(SmallVector<MachineOperand, 1> Cond); 1185 1186 void rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, 1187 MachineBasicBlock *MergeBB, 1188 unsigned BBSelectReg); 1189 1190 MachineInstr *getDefInstr(unsigned Reg); 1191 void insertChainedPHI(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1192 MachineBasicBlock *MergeBB, 1193 LinearizedRegion *InnerRegion, unsigned DestReg, 1194 unsigned SourceReg); 1195 bool containsDef(MachineBasicBlock *MBB, LinearizedRegion *InnerRegion, 1196 unsigned Register); 1197 void rewriteLiveOutRegs(MachineBasicBlock *IfBB, MachineBasicBlock *CodeBB, 1198 MachineBasicBlock *MergeBB, 1199 LinearizedRegion *InnerRegion, 1200 LinearizedRegion *LRegion); 1201 1202 void splitLoopPHI(MachineInstr &PHI, MachineBasicBlock *Entry, 1203 MachineBasicBlock *EntrySucc, LinearizedRegion *LRegion); 1204 void splitLoopPHIs(MachineBasicBlock *Entry, MachineBasicBlock *EntrySucc, 1205 LinearizedRegion *LRegion); 1206 1207 MachineBasicBlock *splitExit(LinearizedRegion *LRegion); 1208 1209 MachineBasicBlock *splitEntry(LinearizedRegion *LRegion); 1210 1211 LinearizedRegion *initLinearizedRegion(RegionMRT *Region); 1212 1213 bool structurizeComplexRegion(RegionMRT *Region); 1214 1215 bool structurizeRegion(RegionMRT *Region); 1216 1217 bool structurizeRegions(RegionMRT *Region, bool isTopRegion); 1218 1219 public: 1220 static char ID; 1221 1222 AMDGPUMachineCFGStructurizer() : MachineFunctionPass(ID) { 1223 initializeAMDGPUMachineCFGStructurizerPass(*PassRegistry::getPassRegistry()); 1224 } 1225 1226 void getAnalysisUsage(AnalysisUsage &AU) const override { 1227 AU.addRequired<MachineRegionInfoPass>(); 1228 MachineFunctionPass::getAnalysisUsage(AU); 1229 } 1230 1231 void initFallthroughMap(MachineFunction &MF); 1232 1233 void createLinearizedRegion(RegionMRT *Region, unsigned SelectOut); 1234 1235 unsigned initializeSelectRegisters(MRT *MRT, unsigned ExistingExitReg, 1236 MachineRegisterInfo *MRI, 1237 const SIInstrInfo *TII); 1238 1239 void setRegionMRT(RegionMRT *RegionTree) { RMRT = RegionTree; } 1240 1241 RegionMRT *getRegionMRT() { return RMRT; } 1242 1243 bool runOnMachineFunction(MachineFunction &MF) override; 1244 }; 1245 1246 } // end anonymous namespace 1247 1248 char AMDGPUMachineCFGStructurizer::ID = 0; 1249 1250 bool AMDGPUMachineCFGStructurizer::regionIsSimpleIf(RegionMRT *Region) { 1251 MachineBasicBlock *Entry = Region->getEntry(); 1252 MachineBasicBlock *Succ = Region->getSucc(); 1253 bool FoundBypass = false; 1254 bool FoundIf = false; 1255 1256 if (Entry->succ_size() != 2) { 1257 return false; 1258 } 1259 1260 for (MachineBasicBlock::const_succ_iterator SI = Entry->succ_begin(), 1261 E = Entry->succ_end(); 1262 SI != E; ++SI) { 1263 MachineBasicBlock *Current = *SI; 1264 1265 if (Current == Succ) { 1266 FoundBypass = true; 1267 } else if ((Current->succ_size() == 1) && 1268 *(Current->succ_begin()) == Succ) { 1269 FoundIf = true; 1270 } 1271 } 1272 1273 return FoundIf && FoundBypass; 1274 } 1275 1276 void AMDGPUMachineCFGStructurizer::transformSimpleIfRegion(RegionMRT *Region) { 1277 MachineBasicBlock *Entry = Region->getEntry(); 1278 MachineBasicBlock *Exit = Region->getExit(); 1279 TII->convertNonUniformIfRegion(Entry, Exit); 1280 } 1281 1282 static void fixMBBTerminator(MachineBasicBlock *MBB) { 1283 if (MBB->succ_size() == 1) { 1284 auto *Succ = *(MBB->succ_begin()); 1285 for (auto &TI : MBB->terminators()) { 1286 for (auto &UI : TI.uses()) { 1287 if (UI.isMBB() && UI.getMBB() != Succ) { 1288 UI.setMBB(Succ); 1289 } 1290 } 1291 } 1292 } 1293 } 1294 1295 static void fixRegionTerminator(RegionMRT *Region) { 1296 MachineBasicBlock *InternalSucc = nullptr; 1297 MachineBasicBlock *ExternalSucc = nullptr; 1298 LinearizedRegion *LRegion = Region->getLinearizedRegion(); 1299 auto Exit = LRegion->getExit(); 1300 1301 SmallPtrSet<MachineBasicBlock *, 2> Successors; 1302 for (MachineBasicBlock::const_succ_iterator SI = Exit->succ_begin(), 1303 SE = Exit->succ_end(); 1304 SI != SE; ++SI) { 1305 MachineBasicBlock *Succ = *SI; 1306 if (LRegion->contains(Succ)) { 1307 // Do not allow re-assign 1308 assert(InternalSucc == nullptr); 1309 InternalSucc = Succ; 1310 } else { 1311 // Do not allow re-assign 1312 assert(ExternalSucc == nullptr); 1313 ExternalSucc = Succ; 1314 } 1315 } 1316 1317 for (auto &TI : Exit->terminators()) { 1318 for (auto &UI : TI.uses()) { 1319 if (UI.isMBB()) { 1320 auto Target = UI.getMBB(); 1321 if (Target != InternalSucc && Target != ExternalSucc) { 1322 UI.setMBB(ExternalSucc); 1323 } 1324 } 1325 } 1326 } 1327 } 1328 1329 // If a region region is just a sequence of regions (and the exit 1330 // block in the case of the top level region), we can simply skip 1331 // linearizing it, because it is already linear 1332 bool regionIsSequence(RegionMRT *Region) { 1333 auto Children = Region->getChildren(); 1334 for (auto CI : *Children) { 1335 if (!CI->isRegion()) { 1336 if (CI->getMBBMRT()->getMBB()->succ_size() > 1) { 1337 return false; 1338 } 1339 } 1340 } 1341 return true; 1342 } 1343 1344 void fixupRegionExits(RegionMRT *Region) { 1345 auto Children = Region->getChildren(); 1346 for (auto CI : *Children) { 1347 if (!CI->isRegion()) { 1348 fixMBBTerminator(CI->getMBBMRT()->getMBB()); 1349 } else { 1350 fixRegionTerminator(CI->getRegionMRT()); 1351 } 1352 } 1353 } 1354 1355 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( 1356 RegionMRT *Region, MachineInstr &PHI, 1357 SmallVector<unsigned, 2> &PHIRegionIndices) { 1358 unsigned NumInputs = getPHINumInputs(PHI); 1359 for (unsigned i = 0; i < NumInputs; ++i) { 1360 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1361 if (Region->contains(Pred)) { 1362 PHIRegionIndices.push_back(i); 1363 } 1364 } 1365 } 1366 1367 void AMDGPUMachineCFGStructurizer::getPHIRegionIndices( 1368 LinearizedRegion *Region, MachineInstr &PHI, 1369 SmallVector<unsigned, 2> &PHIRegionIndices) { 1370 unsigned NumInputs = getPHINumInputs(PHI); 1371 for (unsigned i = 0; i < NumInputs; ++i) { 1372 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1373 if (Region->contains(Pred)) { 1374 PHIRegionIndices.push_back(i); 1375 } 1376 } 1377 } 1378 1379 void AMDGPUMachineCFGStructurizer::getPHINonRegionIndices( 1380 LinearizedRegion *Region, MachineInstr &PHI, 1381 SmallVector<unsigned, 2> &PHINonRegionIndices) { 1382 unsigned NumInputs = getPHINumInputs(PHI); 1383 for (unsigned i = 0; i < NumInputs; ++i) { 1384 MachineBasicBlock *Pred = getPHIPred(PHI, i); 1385 if (!Region->contains(Pred)) { 1386 PHINonRegionIndices.push_back(i); 1387 } 1388 } 1389 } 1390 1391 void AMDGPUMachineCFGStructurizer::storePHILinearizationInfoDest( 1392 unsigned LDestReg, MachineInstr &PHI, 1393 SmallVector<unsigned, 2> *RegionIndices) { 1394 if (RegionIndices) { 1395 for (auto i : *RegionIndices) { 1396 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); 1397 } 1398 } else { 1399 unsigned NumInputs = getPHINumInputs(PHI); 1400 for (unsigned i = 0; i < NumInputs; ++i) { 1401 PHIInfo.addSource(LDestReg, getPHISourceReg(PHI, i), getPHIPred(PHI, i)); 1402 } 1403 } 1404 } 1405 1406 unsigned AMDGPUMachineCFGStructurizer::storePHILinearizationInfo( 1407 MachineInstr &PHI, SmallVector<unsigned, 2> *RegionIndices) { 1408 unsigned DestReg = getPHIDestReg(PHI); 1409 Register LinearizeDestReg = 1410 MRI->createVirtualRegister(MRI->getRegClass(DestReg)); 1411 PHIInfo.addDest(LinearizeDestReg, PHI.getDebugLoc()); 1412 storePHILinearizationInfoDest(LinearizeDestReg, PHI, RegionIndices); 1413 return LinearizeDestReg; 1414 } 1415 1416 void AMDGPUMachineCFGStructurizer::extractKilledPHIs(MachineBasicBlock *MBB) { 1417 // We need to create a new chain for the killed phi, but there is no 1418 // need to do the renaming outside or inside the block. 1419 SmallPtrSet<MachineInstr *, 2> PHIs; 1420 for (MachineBasicBlock::instr_iterator I = MBB->instr_begin(), 1421 E = MBB->instr_end(); 1422 I != E; ++I) { 1423 MachineInstr &Instr = *I; 1424 if (Instr.isPHI()) { 1425 unsigned PHIDestReg = getPHIDestReg(Instr); 1426 LLVM_DEBUG(dbgs() << "Extractking killed phi:\n"); 1427 LLVM_DEBUG(Instr.dump()); 1428 PHIs.insert(&Instr); 1429 PHIInfo.addDest(PHIDestReg, Instr.getDebugLoc()); 1430 storePHILinearizationInfoDest(PHIDestReg, Instr); 1431 } 1432 } 1433 1434 for (auto PI : PHIs) { 1435 PI->eraseFromParent(); 1436 } 1437 } 1438 1439 static bool isPHIRegionIndex(SmallVector<unsigned, 2> PHIRegionIndices, 1440 unsigned Index) { 1441 for (auto i : PHIRegionIndices) { 1442 if (i == Index) 1443 return true; 1444 } 1445 return false; 1446 } 1447 1448 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, 1449 SmallVector<unsigned, 2> &PHIIndices, 1450 unsigned *ReplaceReg) { 1451 return shrinkPHI(PHI, 0, nullptr, PHIIndices, ReplaceReg); 1452 } 1453 1454 bool AMDGPUMachineCFGStructurizer::shrinkPHI(MachineInstr &PHI, 1455 unsigned CombinedSourceReg, 1456 MachineBasicBlock *SourceMBB, 1457 SmallVector<unsigned, 2> &PHIIndices, 1458 unsigned *ReplaceReg) { 1459 LLVM_DEBUG(dbgs() << "Shrink PHI: "); 1460 LLVM_DEBUG(PHI.dump()); 1461 LLVM_DEBUG(dbgs() << " to " << printReg(getPHIDestReg(PHI), TRI) 1462 << " = PHI("); 1463 1464 bool Replaced = false; 1465 unsigned NumInputs = getPHINumInputs(PHI); 1466 int SingleExternalEntryIndex = -1; 1467 for (unsigned i = 0; i < NumInputs; ++i) { 1468 if (!isPHIRegionIndex(PHIIndices, i)) { 1469 if (SingleExternalEntryIndex == -1) { 1470 // Single entry 1471 SingleExternalEntryIndex = i; 1472 } else { 1473 // Multiple entries 1474 SingleExternalEntryIndex = -2; 1475 } 1476 } 1477 } 1478 1479 if (SingleExternalEntryIndex > -1) { 1480 *ReplaceReg = getPHISourceReg(PHI, SingleExternalEntryIndex); 1481 // We should not rewrite the code, we should only pick up the single value 1482 // that represents the shrunk PHI. 1483 Replaced = true; 1484 } else { 1485 MachineBasicBlock *MBB = PHI.getParent(); 1486 MachineInstrBuilder MIB = 1487 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1488 getPHIDestReg(PHI)); 1489 if (SourceMBB) { 1490 MIB.addReg(CombinedSourceReg); 1491 MIB.addMBB(SourceMBB); 1492 LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " 1493 << printMBBReference(*SourceMBB)); 1494 } 1495 1496 for (unsigned i = 0; i < NumInputs; ++i) { 1497 if (isPHIRegionIndex(PHIIndices, i)) { 1498 continue; 1499 } 1500 unsigned SourceReg = getPHISourceReg(PHI, i); 1501 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1502 MIB.addReg(SourceReg); 1503 MIB.addMBB(SourcePred); 1504 LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " 1505 << printMBBReference(*SourcePred)); 1506 } 1507 LLVM_DEBUG(dbgs() << ")\n"); 1508 } 1509 PHI.eraseFromParent(); 1510 return Replaced; 1511 } 1512 1513 void AMDGPUMachineCFGStructurizer::replacePHI( 1514 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *LastMerge, 1515 SmallVector<unsigned, 2> &PHIRegionIndices) { 1516 LLVM_DEBUG(dbgs() << "Replace PHI: "); 1517 LLVM_DEBUG(PHI.dump()); 1518 LLVM_DEBUG(dbgs() << " with " << printReg(getPHIDestReg(PHI), TRI) 1519 << " = PHI("); 1520 1521 bool HasExternalEdge = false; 1522 unsigned NumInputs = getPHINumInputs(PHI); 1523 for (unsigned i = 0; i < NumInputs; ++i) { 1524 if (!isPHIRegionIndex(PHIRegionIndices, i)) { 1525 HasExternalEdge = true; 1526 } 1527 } 1528 1529 if (HasExternalEdge) { 1530 MachineBasicBlock *MBB = PHI.getParent(); 1531 MachineInstrBuilder MIB = 1532 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1533 getPHIDestReg(PHI)); 1534 MIB.addReg(CombinedSourceReg); 1535 MIB.addMBB(LastMerge); 1536 LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " 1537 << printMBBReference(*LastMerge)); 1538 for (unsigned i = 0; i < NumInputs; ++i) { 1539 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1540 continue; 1541 } 1542 unsigned SourceReg = getPHISourceReg(PHI, i); 1543 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1544 MIB.addReg(SourceReg); 1545 MIB.addMBB(SourcePred); 1546 LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " 1547 << printMBBReference(*SourcePred)); 1548 } 1549 LLVM_DEBUG(dbgs() << ")\n"); 1550 } else { 1551 replaceRegisterWith(getPHIDestReg(PHI), CombinedSourceReg); 1552 } 1553 PHI.eraseFromParent(); 1554 } 1555 1556 void AMDGPUMachineCFGStructurizer::replaceEntryPHI( 1557 MachineInstr &PHI, unsigned CombinedSourceReg, MachineBasicBlock *IfMBB, 1558 SmallVector<unsigned, 2> &PHIRegionIndices) { 1559 LLVM_DEBUG(dbgs() << "Replace entry PHI: "); 1560 LLVM_DEBUG(PHI.dump()); 1561 LLVM_DEBUG(dbgs() << " with "); 1562 1563 unsigned NumInputs = getPHINumInputs(PHI); 1564 unsigned NumNonRegionInputs = NumInputs; 1565 for (unsigned i = 0; i < NumInputs; ++i) { 1566 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1567 NumNonRegionInputs--; 1568 } 1569 } 1570 1571 if (NumNonRegionInputs == 0) { 1572 auto DestReg = getPHIDestReg(PHI); 1573 replaceRegisterWith(DestReg, CombinedSourceReg); 1574 LLVM_DEBUG(dbgs() << " register " << printReg(CombinedSourceReg, TRI) 1575 << "\n"); 1576 PHI.eraseFromParent(); 1577 } else { 1578 LLVM_DEBUG(dbgs() << printReg(getPHIDestReg(PHI), TRI) << " = PHI("); 1579 MachineBasicBlock *MBB = PHI.getParent(); 1580 MachineInstrBuilder MIB = 1581 BuildMI(*MBB, PHI, PHI.getDebugLoc(), TII->get(TargetOpcode::PHI), 1582 getPHIDestReg(PHI)); 1583 MIB.addReg(CombinedSourceReg); 1584 MIB.addMBB(IfMBB); 1585 LLVM_DEBUG(dbgs() << printReg(CombinedSourceReg, TRI) << ", " 1586 << printMBBReference(*IfMBB)); 1587 unsigned NumInputs = getPHINumInputs(PHI); 1588 for (unsigned i = 0; i < NumInputs; ++i) { 1589 if (isPHIRegionIndex(PHIRegionIndices, i)) { 1590 continue; 1591 } 1592 unsigned SourceReg = getPHISourceReg(PHI, i); 1593 MachineBasicBlock *SourcePred = getPHIPred(PHI, i); 1594 MIB.addReg(SourceReg); 1595 MIB.addMBB(SourcePred); 1596 LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " 1597 << printMBBReference(*SourcePred)); 1598 } 1599 LLVM_DEBUG(dbgs() << ")\n"); 1600 PHI.eraseFromParent(); 1601 } 1602 } 1603 1604 void AMDGPUMachineCFGStructurizer::replaceLiveOutRegs( 1605 MachineInstr &PHI, SmallVector<unsigned, 2> &PHIRegionIndices, 1606 unsigned CombinedSourceReg, LinearizedRegion *LRegion) { 1607 bool WasLiveOut = false; 1608 for (auto PII : PHIRegionIndices) { 1609 unsigned Reg = getPHISourceReg(PHI, PII); 1610 if (LRegion->isLiveOut(Reg)) { 1611 bool IsDead = true; 1612 1613 // Check if register is live out of the basic block 1614 MachineBasicBlock *DefMBB = getDefInstr(Reg)->getParent(); 1615 for (auto UI = MRI->use_begin(Reg), E = MRI->use_end(); UI != E; ++UI) { 1616 if ((*UI).getParent()->getParent() != DefMBB) { 1617 IsDead = false; 1618 } 1619 } 1620 1621 LLVM_DEBUG(dbgs() << "Register " << printReg(Reg, TRI) << " is " 1622 << (IsDead ? "dead" : "alive") 1623 << " after PHI replace\n"); 1624 if (IsDead) { 1625 LRegion->removeLiveOut(Reg); 1626 } 1627 WasLiveOut = true; 1628 } 1629 } 1630 1631 if (WasLiveOut) 1632 LRegion->addLiveOut(CombinedSourceReg); 1633 } 1634 1635 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHI(RegionMRT *Region, 1636 MachineBasicBlock *LastMerge, 1637 MachineInstr &PHI, 1638 LinearizedRegion *LRegion) { 1639 SmallVector<unsigned, 2> PHIRegionIndices; 1640 getPHIRegionIndices(Region, PHI, PHIRegionIndices); 1641 unsigned LinearizedSourceReg = 1642 storePHILinearizationInfo(PHI, &PHIRegionIndices); 1643 1644 replacePHI(PHI, LinearizedSourceReg, LastMerge, PHIRegionIndices); 1645 replaceLiveOutRegs(PHI, PHIRegionIndices, LinearizedSourceReg, LRegion); 1646 } 1647 1648 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHI(LinearizedRegion *Region, 1649 MachineBasicBlock *IfMBB, 1650 MachineInstr &PHI) { 1651 SmallVector<unsigned, 2> PHINonRegionIndices; 1652 getPHINonRegionIndices(Region, PHI, PHINonRegionIndices); 1653 unsigned LinearizedSourceReg = 1654 storePHILinearizationInfo(PHI, &PHINonRegionIndices); 1655 replaceEntryPHI(PHI, LinearizedSourceReg, IfMBB, PHINonRegionIndices); 1656 } 1657 1658 static void collectPHIs(MachineBasicBlock *MBB, 1659 SmallVector<MachineInstr *, 2> &PHIs) { 1660 for (auto &BBI : *MBB) { 1661 if (BBI.isPHI()) { 1662 PHIs.push_back(&BBI); 1663 } 1664 } 1665 } 1666 1667 void AMDGPUMachineCFGStructurizer::rewriteRegionExitPHIs(RegionMRT *Region, 1668 MachineBasicBlock *LastMerge, 1669 LinearizedRegion *LRegion) { 1670 SmallVector<MachineInstr *, 2> PHIs; 1671 auto Exit = Region->getSucc(); 1672 if (Exit == nullptr) 1673 return; 1674 1675 collectPHIs(Exit, PHIs); 1676 1677 for (auto PHII : PHIs) { 1678 rewriteRegionExitPHI(Region, LastMerge, *PHII, LRegion); 1679 } 1680 } 1681 1682 void AMDGPUMachineCFGStructurizer::rewriteRegionEntryPHIs(LinearizedRegion *Region, 1683 MachineBasicBlock *IfMBB) { 1684 SmallVector<MachineInstr *, 2> PHIs; 1685 auto Entry = Region->getEntry(); 1686 1687 collectPHIs(Entry, PHIs); 1688 1689 for (auto PHII : PHIs) { 1690 rewriteRegionEntryPHI(Region, IfMBB, *PHII); 1691 } 1692 } 1693 1694 void AMDGPUMachineCFGStructurizer::insertUnconditionalBranch(MachineBasicBlock *MBB, 1695 MachineBasicBlock *Dest, 1696 const DebugLoc &DL) { 1697 LLVM_DEBUG(dbgs() << "Inserting unconditional branch: " << MBB->getNumber() 1698 << " -> " << Dest->getNumber() << "\n"); 1699 MachineBasicBlock::instr_iterator Terminator = MBB->getFirstInstrTerminator(); 1700 bool HasTerminator = Terminator != MBB->instr_end(); 1701 if (HasTerminator) { 1702 TII->ReplaceTailWithBranchTo(Terminator, Dest); 1703 } 1704 if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(Dest)) { 1705 TII->insertUnconditionalBranch(*MBB, Dest, DL); 1706 } 1707 } 1708 1709 static MachineBasicBlock *getSingleExitNode(MachineFunction &MF) { 1710 MachineBasicBlock *result = nullptr; 1711 for (auto &MFI : MF) { 1712 if (MFI.succ_size() == 0) { 1713 if (result == nullptr) { 1714 result = &MFI; 1715 } else { 1716 return nullptr; 1717 } 1718 } 1719 } 1720 1721 return result; 1722 } 1723 1724 static bool hasOneExitNode(MachineFunction &MF) { 1725 return getSingleExitNode(MF) != nullptr; 1726 } 1727 1728 MachineBasicBlock * 1729 AMDGPUMachineCFGStructurizer::createLinearizedExitBlock(RegionMRT *Region) { 1730 auto Exit = Region->getSucc(); 1731 1732 // If the exit is the end of the function, we just use the existing 1733 MachineFunction *MF = Region->getEntry()->getParent(); 1734 if (Exit == nullptr && hasOneExitNode(*MF)) { 1735 return &(*(--(Region->getEntry()->getParent()->end()))); 1736 } 1737 1738 MachineBasicBlock *LastMerge = MF->CreateMachineBasicBlock(); 1739 if (Exit == nullptr) { 1740 MachineFunction::iterator ExitIter = MF->end(); 1741 MF->insert(ExitIter, LastMerge); 1742 } else { 1743 MachineFunction::iterator ExitIter = Exit->getIterator(); 1744 MF->insert(ExitIter, LastMerge); 1745 LastMerge->addSuccessor(Exit); 1746 insertUnconditionalBranch(LastMerge, Exit); 1747 LLVM_DEBUG(dbgs() << "Created exit block: " << LastMerge->getNumber() 1748 << "\n"); 1749 } 1750 return LastMerge; 1751 } 1752 1753 void AMDGPUMachineCFGStructurizer::insertMergePHI(MachineBasicBlock *IfBB, 1754 MachineBasicBlock *CodeBB, 1755 MachineBasicBlock *MergeBB, 1756 unsigned DestRegister, 1757 unsigned IfSourceRegister, 1758 unsigned CodeSourceRegister, 1759 bool IsUndefIfSource) { 1760 // If this is the function exit block, we don't need a phi. 1761 if (MergeBB->succ_begin() == MergeBB->succ_end()) { 1762 return; 1763 } 1764 LLVM_DEBUG(dbgs() << "Merge PHI (" << printMBBReference(*MergeBB) 1765 << "): " << printReg(DestRegister, TRI) << " = PHI(" 1766 << printReg(IfSourceRegister, TRI) << ", " 1767 << printMBBReference(*IfBB) 1768 << printReg(CodeSourceRegister, TRI) << ", " 1769 << printMBBReference(*CodeBB) << ")\n"); 1770 const DebugLoc &DL = MergeBB->findDebugLoc(MergeBB->begin()); 1771 MachineInstrBuilder MIB = BuildMI(*MergeBB, MergeBB->instr_begin(), DL, 1772 TII->get(TargetOpcode::PHI), DestRegister); 1773 if (IsUndefIfSource && false) { 1774 MIB.addReg(IfSourceRegister, RegState::Undef); 1775 } else { 1776 MIB.addReg(IfSourceRegister); 1777 } 1778 MIB.addMBB(IfBB); 1779 MIB.addReg(CodeSourceRegister); 1780 MIB.addMBB(CodeBB); 1781 } 1782 1783 static void removeExternalCFGSuccessors(MachineBasicBlock *MBB) { 1784 for (MachineBasicBlock::succ_iterator PI = MBB->succ_begin(), 1785 E = MBB->succ_end(); 1786 PI != E; ++PI) { 1787 if ((*PI) != MBB) { 1788 (MBB)->removeSuccessor(*PI); 1789 } 1790 } 1791 } 1792 1793 static void removeExternalCFGEdges(MachineBasicBlock *StartMBB, 1794 MachineBasicBlock *EndMBB) { 1795 1796 // We have to check against the StartMBB successor becasuse a 1797 // structurized region with a loop will have the entry block split, 1798 // and the backedge will go to the entry successor. 1799 DenseSet<std::pair<MachineBasicBlock *, MachineBasicBlock *>> Succs; 1800 unsigned SuccSize = StartMBB->succ_size(); 1801 if (SuccSize > 0) { 1802 MachineBasicBlock *StartMBBSucc = *(StartMBB->succ_begin()); 1803 for (MachineBasicBlock::succ_iterator PI = EndMBB->succ_begin(), 1804 E = EndMBB->succ_end(); 1805 PI != E; ++PI) { 1806 // Either we have a back-edge to the entry block, or a back-edge to the 1807 // successor of the entry block since the block may be split. 1808 if ((*PI) != StartMBB && 1809 !((*PI) == StartMBBSucc && StartMBB != EndMBB && SuccSize == 1)) { 1810 Succs.insert( 1811 std::pair<MachineBasicBlock *, MachineBasicBlock *>(EndMBB, *PI)); 1812 } 1813 } 1814 } 1815 1816 for (MachineBasicBlock::pred_iterator PI = StartMBB->pred_begin(), 1817 E = StartMBB->pred_end(); 1818 PI != E; ++PI) { 1819 if ((*PI) != EndMBB) { 1820 Succs.insert( 1821 std::pair<MachineBasicBlock *, MachineBasicBlock *>(*PI, StartMBB)); 1822 } 1823 } 1824 1825 for (auto SI : Succs) { 1826 std::pair<MachineBasicBlock *, MachineBasicBlock *> Edge = SI; 1827 LLVM_DEBUG(dbgs() << "Removing edge: " << printMBBReference(*Edge.first) 1828 << " -> " << printMBBReference(*Edge.second) << "\n"); 1829 Edge.first->removeSuccessor(Edge.second); 1830 } 1831 } 1832 1833 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfBlock( 1834 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBBStart, 1835 MachineBasicBlock *CodeBBEnd, MachineBasicBlock *SelectBB, unsigned IfReg, 1836 bool InheritPreds) { 1837 MachineFunction *MF = MergeBB->getParent(); 1838 MachineBasicBlock *IfBB = MF->CreateMachineBasicBlock(); 1839 1840 if (InheritPreds) { 1841 for (MachineBasicBlock::pred_iterator PI = CodeBBStart->pred_begin(), 1842 E = CodeBBStart->pred_end(); 1843 PI != E; ++PI) { 1844 if ((*PI) != CodeBBEnd) { 1845 MachineBasicBlock *Pred = (*PI); 1846 Pred->addSuccessor(IfBB); 1847 } 1848 } 1849 } 1850 1851 removeExternalCFGEdges(CodeBBStart, CodeBBEnd); 1852 1853 auto CodeBBStartI = CodeBBStart->getIterator(); 1854 auto CodeBBEndI = CodeBBEnd->getIterator(); 1855 auto MergeIter = MergeBB->getIterator(); 1856 MF->insert(MergeIter, IfBB); 1857 MF->splice(MergeIter, CodeBBStartI, ++CodeBBEndI); 1858 IfBB->addSuccessor(MergeBB); 1859 IfBB->addSuccessor(CodeBBStart); 1860 1861 LLVM_DEBUG(dbgs() << "Created If block: " << IfBB->getNumber() << "\n"); 1862 // Ensure that the MergeBB is a successor of the CodeEndBB. 1863 if (!CodeBBEnd->isSuccessor(MergeBB)) 1864 CodeBBEnd->addSuccessor(MergeBB); 1865 1866 LLVM_DEBUG(dbgs() << "Moved " << printMBBReference(*CodeBBStart) 1867 << " through " << printMBBReference(*CodeBBEnd) << "\n"); 1868 1869 // If we have a single predecessor we can find a reasonable debug location 1870 MachineBasicBlock *SinglePred = 1871 CodeBBStart->pred_size() == 1 ? *(CodeBBStart->pred_begin()) : nullptr; 1872 const DebugLoc &DL = SinglePred 1873 ? SinglePred->findDebugLoc(SinglePred->getFirstTerminator()) 1874 : DebugLoc(); 1875 1876 Register Reg = 1877 TII->insertEQ(IfBB, IfBB->begin(), DL, IfReg, 1878 SelectBB->getNumber() /* CodeBBStart->getNumber() */); 1879 if (&(*(IfBB->getParent()->begin())) == IfBB) { 1880 TII->materializeImmediate(*IfBB, IfBB->begin(), DL, IfReg, 1881 CodeBBStart->getNumber()); 1882 } 1883 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); 1884 ArrayRef<MachineOperand> Cond(RegOp); 1885 TII->insertBranch(*IfBB, MergeBB, CodeBBStart, Cond, DL); 1886 1887 return IfBB; 1888 } 1889 1890 void AMDGPUMachineCFGStructurizer::ensureCondIsNotKilled( 1891 SmallVector<MachineOperand, 1> Cond) { 1892 if (Cond.size() != 1) 1893 return; 1894 if (!Cond[0].isReg()) 1895 return; 1896 1897 Register CondReg = Cond[0].getReg(); 1898 for (auto UI = MRI->use_begin(CondReg), E = MRI->use_end(); UI != E; ++UI) { 1899 (*UI).setIsKill(false); 1900 } 1901 } 1902 1903 void AMDGPUMachineCFGStructurizer::rewriteCodeBBTerminator(MachineBasicBlock *CodeBB, 1904 MachineBasicBlock *MergeBB, 1905 unsigned BBSelectReg) { 1906 MachineBasicBlock *TrueBB = nullptr; 1907 MachineBasicBlock *FalseBB = nullptr; 1908 SmallVector<MachineOperand, 1> Cond; 1909 MachineBasicBlock *FallthroughBB = FallthroughMap[CodeBB]; 1910 TII->analyzeBranch(*CodeBB, TrueBB, FalseBB, Cond); 1911 1912 const DebugLoc &DL = CodeBB->findDebugLoc(CodeBB->getFirstTerminator()); 1913 1914 if (FalseBB == nullptr && TrueBB == nullptr && FallthroughBB == nullptr) { 1915 // This is an exit block, hence no successors. We will assign the 1916 // bb select register to the entry block. 1917 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1918 BBSelectReg, 1919 CodeBB->getParent()->begin()->getNumber()); 1920 insertUnconditionalBranch(CodeBB, MergeBB, DL); 1921 return; 1922 } 1923 1924 if (FalseBB == nullptr && TrueBB == nullptr) { 1925 TrueBB = FallthroughBB; 1926 } else if (TrueBB != nullptr) { 1927 FalseBB = 1928 (FallthroughBB && (FallthroughBB != TrueBB)) ? FallthroughBB : FalseBB; 1929 } 1930 1931 if ((TrueBB != nullptr && FalseBB == nullptr) || (TrueBB == FalseBB)) { 1932 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1933 BBSelectReg, TrueBB->getNumber()); 1934 } else { 1935 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectReg); 1936 Register TrueBBReg = MRI->createVirtualRegister(RegClass); 1937 Register FalseBBReg = MRI->createVirtualRegister(RegClass); 1938 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1939 TrueBBReg, TrueBB->getNumber()); 1940 TII->materializeImmediate(*CodeBB, CodeBB->getFirstTerminator(), DL, 1941 FalseBBReg, FalseBB->getNumber()); 1942 ensureCondIsNotKilled(Cond); 1943 TII->insertVectorSelect(*CodeBB, CodeBB->getFirstTerminator(), DL, 1944 BBSelectReg, Cond, TrueBBReg, FalseBBReg); 1945 } 1946 1947 insertUnconditionalBranch(CodeBB, MergeBB, DL); 1948 } 1949 1950 MachineInstr *AMDGPUMachineCFGStructurizer::getDefInstr(unsigned Reg) { 1951 if (MRI->def_begin(Reg) == MRI->def_end()) { 1952 LLVM_DEBUG(dbgs() << "Register " 1953 << printReg(Reg, MRI->getTargetRegisterInfo()) 1954 << " has NO defs\n"); 1955 } else if (!MRI->hasOneDef(Reg)) { 1956 LLVM_DEBUG(dbgs() << "Register " 1957 << printReg(Reg, MRI->getTargetRegisterInfo()) 1958 << " has multiple defs\n"); 1959 LLVM_DEBUG(dbgs() << "DEFS BEGIN:\n"); 1960 for (auto DI = MRI->def_begin(Reg), DE = MRI->def_end(); DI != DE; ++DI) { 1961 LLVM_DEBUG(DI->getParent()->dump()); 1962 } 1963 LLVM_DEBUG(dbgs() << "DEFS END\n"); 1964 } 1965 1966 assert(MRI->hasOneDef(Reg) && "Register has multiple definitions"); 1967 return (*(MRI->def_begin(Reg))).getParent(); 1968 } 1969 1970 void AMDGPUMachineCFGStructurizer::insertChainedPHI(MachineBasicBlock *IfBB, 1971 MachineBasicBlock *CodeBB, 1972 MachineBasicBlock *MergeBB, 1973 LinearizedRegion *InnerRegion, 1974 unsigned DestReg, 1975 unsigned SourceReg) { 1976 // In this function we know we are part of a chain already, so we need 1977 // to add the registers to the existing chain, and rename the register 1978 // inside the region. 1979 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); 1980 MachineInstr *DefInstr = getDefInstr(SourceReg); 1981 if (DefInstr->isPHI() && DefInstr->getParent() == CodeBB && IsSingleBB) { 1982 // Handle the case where the def is a PHI-def inside a basic 1983 // block, then we only need to do renaming. Special care needs to 1984 // be taken if the PHI-def is part of an existing chain, or if a 1985 // new one needs to be created. 1986 InnerRegion->replaceRegisterInsideRegion(SourceReg, DestReg, true, MRI); 1987 1988 // We collect all PHI Information, and if we are at the region entry, 1989 // all PHIs will be removed, and then re-introduced if needed. 1990 storePHILinearizationInfoDest(DestReg, *DefInstr); 1991 // We have picked up all the information we need now and can remove 1992 // the PHI 1993 PHIInfo.removeSource(DestReg, SourceReg, CodeBB); 1994 DefInstr->eraseFromParent(); 1995 } else { 1996 // If this is not a phi-def, or it is a phi-def but from a linearized region 1997 if (IsSingleBB && DefInstr->getParent() == InnerRegion->getEntry()) { 1998 // If this is a single BB and the definition is in this block we 1999 // need to replace any uses outside the region. 2000 InnerRegion->replaceRegisterOutsideRegion(SourceReg, DestReg, false, MRI); 2001 } 2002 const TargetRegisterClass *RegClass = MRI->getRegClass(DestReg); 2003 Register NextDestReg = MRI->createVirtualRegister(RegClass); 2004 bool IsLastDef = PHIInfo.getNumSources(DestReg) == 1; 2005 LLVM_DEBUG(dbgs() << "Insert Chained PHI\n"); 2006 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, DestReg, NextDestReg, 2007 SourceReg, IsLastDef); 2008 2009 PHIInfo.removeSource(DestReg, SourceReg, CodeBB); 2010 if (IsLastDef) { 2011 const DebugLoc &DL = IfBB->findDebugLoc(IfBB->getFirstTerminator()); 2012 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DL, 2013 NextDestReg, 0); 2014 PHIInfo.deleteDef(DestReg); 2015 } else { 2016 PHIInfo.replaceDef(DestReg, NextDestReg); 2017 } 2018 } 2019 } 2020 2021 bool AMDGPUMachineCFGStructurizer::containsDef(MachineBasicBlock *MBB, 2022 LinearizedRegion *InnerRegion, 2023 unsigned Register) { 2024 return getDefInstr(Register)->getParent() == MBB || 2025 InnerRegion->contains(getDefInstr(Register)->getParent()); 2026 } 2027 2028 void AMDGPUMachineCFGStructurizer::rewriteLiveOutRegs(MachineBasicBlock *IfBB, 2029 MachineBasicBlock *CodeBB, 2030 MachineBasicBlock *MergeBB, 2031 LinearizedRegion *InnerRegion, 2032 LinearizedRegion *LRegion) { 2033 DenseSet<unsigned> *LiveOuts = InnerRegion->getLiveOuts(); 2034 SmallVector<unsigned, 4> OldLiveOuts; 2035 bool IsSingleBB = InnerRegion->getEntry() == InnerRegion->getExit(); 2036 for (auto OLI : *LiveOuts) { 2037 OldLiveOuts.push_back(OLI); 2038 } 2039 2040 for (auto LI : OldLiveOuts) { 2041 LLVM_DEBUG(dbgs() << "LiveOut: " << printReg(LI, TRI)); 2042 if (!containsDef(CodeBB, InnerRegion, LI) || 2043 (!IsSingleBB && (getDefInstr(LI)->getParent() == LRegion->getExit()))) { 2044 // If the register simly lives through the CodeBB, we don't have 2045 // to rewrite anything since the register is not defined in this 2046 // part of the code. 2047 LLVM_DEBUG(dbgs() << "- through"); 2048 continue; 2049 } 2050 LLVM_DEBUG(dbgs() << "\n"); 2051 unsigned Reg = LI; 2052 if (/*!PHIInfo.isSource(Reg) &&*/ Reg != InnerRegion->getBBSelectRegOut()) { 2053 // If the register is live out, we do want to create a phi, 2054 // unless it is from the Exit block, becasuse in that case there 2055 // is already a PHI, and no need to create a new one. 2056 2057 // If the register is just a live out def and not part of a phi 2058 // chain, we need to create a PHI node to handle the if region, 2059 // and replace all uses outside of the region with the new dest 2060 // register, unless it is the outgoing BB select register. We have 2061 // already creaed phi nodes for these. 2062 const TargetRegisterClass *RegClass = MRI->getRegClass(Reg); 2063 Register PHIDestReg = MRI->createVirtualRegister(RegClass); 2064 Register IfSourceReg = MRI->createVirtualRegister(RegClass); 2065 // Create initializer, this value is never used, but is needed 2066 // to satisfy SSA. 2067 LLVM_DEBUG(dbgs() << "Initializer for reg: " << printReg(Reg) << "\n"); 2068 TII->materializeImmediate(*IfBB, IfBB->getFirstTerminator(), DebugLoc(), 2069 IfSourceReg, 0); 2070 2071 InnerRegion->replaceRegisterOutsideRegion(Reg, PHIDestReg, true, MRI); 2072 LLVM_DEBUG(dbgs() << "Insert Non-Chained Live out PHI\n"); 2073 insertMergePHI(IfBB, InnerRegion->getExit(), MergeBB, PHIDestReg, 2074 IfSourceReg, Reg, true); 2075 } 2076 } 2077 2078 // Handle the chained definitions in PHIInfo, checking if this basic block 2079 // is a source block for a definition. 2080 SmallVector<unsigned, 4> Sources; 2081 if (PHIInfo.findSourcesFromMBB(CodeBB, Sources)) { 2082 LLVM_DEBUG(dbgs() << "Inserting PHI Live Out from " 2083 << printMBBReference(*CodeBB) << "\n"); 2084 for (auto SI : Sources) { 2085 unsigned DestReg; 2086 PHIInfo.findDest(SI, CodeBB, DestReg); 2087 insertChainedPHI(IfBB, CodeBB, MergeBB, InnerRegion, DestReg, SI); 2088 } 2089 LLVM_DEBUG(dbgs() << "Insertion done.\n"); 2090 } 2091 2092 LLVM_DEBUG(PHIInfo.dump(MRI)); 2093 } 2094 2095 void AMDGPUMachineCFGStructurizer::prunePHIInfo(MachineBasicBlock *MBB) { 2096 LLVM_DEBUG(dbgs() << "Before PHI Prune\n"); 2097 LLVM_DEBUG(PHIInfo.dump(MRI)); 2098 SmallVector<std::tuple<unsigned, unsigned, MachineBasicBlock *>, 4> 2099 ElimiatedSources; 2100 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2101 ++DRI) { 2102 2103 unsigned DestReg = *DRI; 2104 auto SE = PHIInfo.sources_end(DestReg); 2105 2106 bool MBBContainsPHISource = false; 2107 // Check if there is a PHI source in this MBB 2108 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2109 unsigned SourceReg = (*SRI).first; 2110 MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); 2111 if (Def->getParent()->getParent() == MBB) { 2112 MBBContainsPHISource = true; 2113 } 2114 } 2115 2116 // If so, all other sources are useless since we know this block 2117 // is always executed when the region is executed. 2118 if (MBBContainsPHISource) { 2119 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2120 PHILinearize::PHISourceT Source = *SRI; 2121 unsigned SourceReg = Source.first; 2122 MachineBasicBlock *SourceMBB = Source.second; 2123 MachineOperand *Def = &(*(MRI->def_begin(SourceReg))); 2124 if (Def->getParent()->getParent() != MBB) { 2125 ElimiatedSources.push_back( 2126 std::make_tuple(DestReg, SourceReg, SourceMBB)); 2127 } 2128 } 2129 } 2130 } 2131 2132 // Remove the PHI sources that are in the given MBB 2133 for (auto &SourceInfo : ElimiatedSources) { 2134 PHIInfo.removeSource(std::get<0>(SourceInfo), std::get<1>(SourceInfo), 2135 std::get<2>(SourceInfo)); 2136 } 2137 LLVM_DEBUG(dbgs() << "After PHI Prune\n"); 2138 LLVM_DEBUG(PHIInfo.dump(MRI)); 2139 } 2140 2141 void AMDGPUMachineCFGStructurizer::createEntryPHI(LinearizedRegion *CurrentRegion, 2142 unsigned DestReg) { 2143 MachineBasicBlock *Entry = CurrentRegion->getEntry(); 2144 MachineBasicBlock *Exit = CurrentRegion->getExit(); 2145 2146 LLVM_DEBUG(dbgs() << "RegionExit: " << Exit->getNumber() << " Pred: " 2147 << (*(Entry->pred_begin()))->getNumber() << "\n"); 2148 2149 int NumSources = 0; 2150 auto SE = PHIInfo.sources_end(DestReg); 2151 2152 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2153 NumSources++; 2154 } 2155 2156 if (NumSources == 1) { 2157 auto SRI = PHIInfo.sources_begin(DestReg); 2158 unsigned SourceReg = (*SRI).first; 2159 replaceRegisterWith(DestReg, SourceReg); 2160 } else { 2161 const DebugLoc &DL = Entry->findDebugLoc(Entry->begin()); 2162 MachineInstrBuilder MIB = BuildMI(*Entry, Entry->instr_begin(), DL, 2163 TII->get(TargetOpcode::PHI), DestReg); 2164 LLVM_DEBUG(dbgs() << "Entry PHI " << printReg(DestReg, TRI) << " = PHI("); 2165 2166 unsigned CurrentBackedgeReg = 0; 2167 2168 for (auto SRI = PHIInfo.sources_begin(DestReg); SRI != SE; ++SRI) { 2169 unsigned SourceReg = (*SRI).first; 2170 2171 if (CurrentRegion->contains((*SRI).second)) { 2172 if (CurrentBackedgeReg == 0) { 2173 CurrentBackedgeReg = SourceReg; 2174 } else { 2175 MachineInstr *PHIDefInstr = getDefInstr(SourceReg); 2176 MachineBasicBlock *PHIDefMBB = PHIDefInstr->getParent(); 2177 const TargetRegisterClass *RegClass = 2178 MRI->getRegClass(CurrentBackedgeReg); 2179 Register NewBackedgeReg = MRI->createVirtualRegister(RegClass); 2180 MachineInstrBuilder BackedgePHI = 2181 BuildMI(*PHIDefMBB, PHIDefMBB->instr_begin(), DL, 2182 TII->get(TargetOpcode::PHI), NewBackedgeReg); 2183 BackedgePHI.addReg(CurrentBackedgeReg); 2184 BackedgePHI.addMBB(getPHIPred(*PHIDefInstr, 0)); 2185 BackedgePHI.addReg(getPHISourceReg(*PHIDefInstr, 1)); 2186 BackedgePHI.addMBB((*SRI).second); 2187 CurrentBackedgeReg = NewBackedgeReg; 2188 LLVM_DEBUG(dbgs() 2189 << "Inserting backedge PHI: " 2190 << printReg(NewBackedgeReg, TRI) << " = PHI(" 2191 << printReg(CurrentBackedgeReg, TRI) << ", " 2192 << printMBBReference(*getPHIPred(*PHIDefInstr, 0)) << ", " 2193 << printReg(getPHISourceReg(*PHIDefInstr, 1), TRI) << ", " 2194 << printMBBReference(*(*SRI).second)); 2195 } 2196 } else { 2197 MIB.addReg(SourceReg); 2198 MIB.addMBB((*SRI).second); 2199 LLVM_DEBUG(dbgs() << printReg(SourceReg, TRI) << ", " 2200 << printMBBReference(*(*SRI).second) << ", "); 2201 } 2202 } 2203 2204 // Add the final backedge register source to the entry phi 2205 if (CurrentBackedgeReg != 0) { 2206 MIB.addReg(CurrentBackedgeReg); 2207 MIB.addMBB(Exit); 2208 LLVM_DEBUG(dbgs() << printReg(CurrentBackedgeReg, TRI) << ", " 2209 << printMBBReference(*Exit) << ")\n"); 2210 } else { 2211 LLVM_DEBUG(dbgs() << ")\n"); 2212 } 2213 } 2214 } 2215 2216 void AMDGPUMachineCFGStructurizer::createEntryPHIs(LinearizedRegion *CurrentRegion) { 2217 LLVM_DEBUG(PHIInfo.dump(MRI)); 2218 2219 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2220 ++DRI) { 2221 2222 unsigned DestReg = *DRI; 2223 createEntryPHI(CurrentRegion, DestReg); 2224 } 2225 PHIInfo.clear(); 2226 } 2227 2228 void AMDGPUMachineCFGStructurizer::replaceRegisterWith( 2229 unsigned Register, class Register NewRegister) { 2230 assert(Register != NewRegister && "Cannot replace a reg with itself"); 2231 2232 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(Register), 2233 E = MRI->reg_end(); 2234 I != E;) { 2235 MachineOperand &O = *I; 2236 ++I; 2237 if (NewRegister.isPhysical()) { 2238 LLVM_DEBUG(dbgs() << "Trying to substitute physical register: " 2239 << printReg(NewRegister, MRI->getTargetRegisterInfo()) 2240 << "\n"); 2241 llvm_unreachable("Cannot substitute physical registers"); 2242 // We don't handle physical registers, but if we need to 2243 // in the future This is how we do it: 2244 // O.substPhysReg(NewRegister, *TRI); 2245 } else { 2246 LLVM_DEBUG(dbgs() << "Replacing register: " 2247 << printReg(Register, MRI->getTargetRegisterInfo()) 2248 << " with " 2249 << printReg(NewRegister, MRI->getTargetRegisterInfo()) 2250 << "\n"); 2251 O.setReg(NewRegister); 2252 } 2253 } 2254 PHIInfo.deleteDef(Register); 2255 2256 getRegionMRT()->replaceLiveOutReg(Register, NewRegister); 2257 2258 LLVM_DEBUG(PHIInfo.dump(MRI)); 2259 } 2260 2261 void AMDGPUMachineCFGStructurizer::resolvePHIInfos(MachineBasicBlock *FunctionEntry) { 2262 LLVM_DEBUG(dbgs() << "Resolve PHI Infos\n"); 2263 LLVM_DEBUG(PHIInfo.dump(MRI)); 2264 for (auto DRI = PHIInfo.dests_begin(), DE = PHIInfo.dests_end(); DRI != DE; 2265 ++DRI) { 2266 unsigned DestReg = *DRI; 2267 LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) << "\n"); 2268 auto SRI = PHIInfo.sources_begin(DestReg); 2269 unsigned SourceReg = (*SRI).first; 2270 LLVM_DEBUG(dbgs() << "DestReg: " << printReg(DestReg, TRI) 2271 << " SourceReg: " << printReg(SourceReg, TRI) << "\n"); 2272 2273 assert(PHIInfo.sources_end(DestReg) == ++SRI && 2274 "More than one phi source in entry node"); 2275 replaceRegisterWith(DestReg, SourceReg); 2276 } 2277 } 2278 2279 static bool isFunctionEntryBlock(MachineBasicBlock *MBB) { 2280 return ((&(*(MBB->getParent()->begin()))) == MBB); 2281 } 2282 2283 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( 2284 MachineBasicBlock *MergeBB, MachineBasicBlock *CodeBB, 2285 LinearizedRegion *CurrentRegion, unsigned BBSelectRegIn, 2286 unsigned BBSelectRegOut) { 2287 if (isFunctionEntryBlock(CodeBB) && !CurrentRegion->getHasLoop()) { 2288 // Handle non-loop function entry block. 2289 // We need to allow loops to the entry block and then 2290 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); 2291 resolvePHIInfos(CodeBB); 2292 removeExternalCFGSuccessors(CodeBB); 2293 CodeBB->addSuccessor(MergeBB); 2294 CurrentRegion->addMBB(CodeBB); 2295 return nullptr; 2296 } 2297 if (CurrentRegion->getEntry() == CodeBB && !CurrentRegion->getHasLoop()) { 2298 // Handle non-loop region entry block. 2299 MachineFunction *MF = MergeBB->getParent(); 2300 auto MergeIter = MergeBB->getIterator(); 2301 auto CodeBBStartIter = CodeBB->getIterator(); 2302 auto CodeBBEndIter = ++(CodeBB->getIterator()); 2303 if (CodeBBEndIter != MergeIter) { 2304 MF->splice(MergeIter, CodeBBStartIter, CodeBBEndIter); 2305 } 2306 rewriteCodeBBTerminator(CodeBB, MergeBB, BBSelectRegOut); 2307 prunePHIInfo(CodeBB); 2308 createEntryPHIs(CurrentRegion); 2309 removeExternalCFGSuccessors(CodeBB); 2310 CodeBB->addSuccessor(MergeBB); 2311 CurrentRegion->addMBB(CodeBB); 2312 return nullptr; 2313 } else { 2314 // Handle internal block. 2315 const TargetRegisterClass *RegClass = MRI->getRegClass(BBSelectRegIn); 2316 Register CodeBBSelectReg = MRI->createVirtualRegister(RegClass); 2317 rewriteCodeBBTerminator(CodeBB, MergeBB, CodeBBSelectReg); 2318 bool IsRegionEntryBB = CurrentRegion->getEntry() == CodeBB; 2319 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeBB, CodeBB, CodeBB, 2320 BBSelectRegIn, IsRegionEntryBB); 2321 CurrentRegion->addMBB(IfBB); 2322 // If this is the entry block we need to make the If block the new 2323 // linearized region entry. 2324 if (IsRegionEntryBB) { 2325 CurrentRegion->setEntry(IfBB); 2326 2327 if (CurrentRegion->getHasLoop()) { 2328 MachineBasicBlock *RegionExit = CurrentRegion->getExit(); 2329 MachineBasicBlock *ETrueBB = nullptr; 2330 MachineBasicBlock *EFalseBB = nullptr; 2331 SmallVector<MachineOperand, 1> ECond; 2332 2333 const DebugLoc &DL = DebugLoc(); 2334 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); 2335 TII->removeBranch(*RegionExit); 2336 2337 // We need to create a backedge if there is a loop 2338 Register Reg = TII->insertNE( 2339 RegionExit, RegionExit->instr_end(), DL, 2340 CurrentRegion->getRegionMRT()->getInnerOutputRegister(), 2341 CurrentRegion->getRegionMRT()->getEntry()->getNumber()); 2342 MachineOperand RegOp = 2343 MachineOperand::CreateReg(Reg, false, false, true); 2344 ArrayRef<MachineOperand> Cond(RegOp); 2345 LLVM_DEBUG(dbgs() << "RegionExitReg: "); 2346 LLVM_DEBUG(Cond[0].print(dbgs(), TRI)); 2347 LLVM_DEBUG(dbgs() << "\n"); 2348 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, 2349 Cond, DebugLoc()); 2350 RegionExit->addSuccessor(CurrentRegion->getEntry()); 2351 } 2352 } 2353 CurrentRegion->addMBB(CodeBB); 2354 LinearizedRegion InnerRegion(CodeBB, MRI, TRI, PHIInfo); 2355 2356 InnerRegion.setParent(CurrentRegion); 2357 LLVM_DEBUG(dbgs() << "Insert BB Select PHI (BB)\n"); 2358 insertMergePHI(IfBB, CodeBB, MergeBB, BBSelectRegOut, BBSelectRegIn, 2359 CodeBBSelectReg); 2360 InnerRegion.addMBB(MergeBB); 2361 2362 LLVM_DEBUG(InnerRegion.print(dbgs(), TRI)); 2363 rewriteLiveOutRegs(IfBB, CodeBB, MergeBB, &InnerRegion, CurrentRegion); 2364 extractKilledPHIs(CodeBB); 2365 if (IsRegionEntryBB) { 2366 createEntryPHIs(CurrentRegion); 2367 } 2368 return IfBB; 2369 } 2370 } 2371 2372 MachineBasicBlock *AMDGPUMachineCFGStructurizer::createIfRegion( 2373 MachineBasicBlock *MergeBB, LinearizedRegion *InnerRegion, 2374 LinearizedRegion *CurrentRegion, MachineBasicBlock *SelectBB, 2375 unsigned BBSelectRegIn, unsigned BBSelectRegOut) { 2376 unsigned CodeBBSelectReg = 2377 InnerRegion->getRegionMRT()->getInnerOutputRegister(); 2378 MachineBasicBlock *CodeEntryBB = InnerRegion->getEntry(); 2379 MachineBasicBlock *CodeExitBB = InnerRegion->getExit(); 2380 MachineBasicBlock *IfBB = createIfBlock(MergeBB, CodeEntryBB, CodeExitBB, 2381 SelectBB, BBSelectRegIn, true); 2382 CurrentRegion->addMBB(IfBB); 2383 bool isEntry = CurrentRegion->getEntry() == InnerRegion->getEntry(); 2384 if (isEntry) { 2385 2386 if (CurrentRegion->getHasLoop()) { 2387 MachineBasicBlock *RegionExit = CurrentRegion->getExit(); 2388 MachineBasicBlock *ETrueBB = nullptr; 2389 MachineBasicBlock *EFalseBB = nullptr; 2390 SmallVector<MachineOperand, 1> ECond; 2391 2392 const DebugLoc &DL = DebugLoc(); 2393 TII->analyzeBranch(*RegionExit, ETrueBB, EFalseBB, ECond); 2394 TII->removeBranch(*RegionExit); 2395 2396 // We need to create a backedge if there is a loop 2397 Register Reg = 2398 TII->insertNE(RegionExit, RegionExit->instr_end(), DL, 2399 CurrentRegion->getRegionMRT()->getInnerOutputRegister(), 2400 CurrentRegion->getRegionMRT()->getEntry()->getNumber()); 2401 MachineOperand RegOp = MachineOperand::CreateReg(Reg, false, false, true); 2402 ArrayRef<MachineOperand> Cond(RegOp); 2403 LLVM_DEBUG(dbgs() << "RegionExitReg: "); 2404 LLVM_DEBUG(Cond[0].print(dbgs(), TRI)); 2405 LLVM_DEBUG(dbgs() << "\n"); 2406 TII->insertBranch(*RegionExit, CurrentRegion->getEntry(), RegionExit, 2407 Cond, DebugLoc()); 2408 RegionExit->addSuccessor(IfBB); 2409 } 2410 } 2411 CurrentRegion->addMBBs(InnerRegion); 2412 LLVM_DEBUG(dbgs() << "Insert BB Select PHI (region)\n"); 2413 insertMergePHI(IfBB, CodeExitBB, MergeBB, BBSelectRegOut, BBSelectRegIn, 2414 CodeBBSelectReg); 2415 2416 rewriteLiveOutRegs(IfBB, /* CodeEntryBB */ CodeExitBB, MergeBB, InnerRegion, 2417 CurrentRegion); 2418 2419 rewriteRegionEntryPHIs(InnerRegion, IfBB); 2420 2421 if (isEntry) { 2422 CurrentRegion->setEntry(IfBB); 2423 } 2424 2425 if (isEntry) { 2426 createEntryPHIs(CurrentRegion); 2427 } 2428 2429 return IfBB; 2430 } 2431 2432 void AMDGPUMachineCFGStructurizer::splitLoopPHI(MachineInstr &PHI, 2433 MachineBasicBlock *Entry, 2434 MachineBasicBlock *EntrySucc, 2435 LinearizedRegion *LRegion) { 2436 SmallVector<unsigned, 2> PHIRegionIndices; 2437 getPHIRegionIndices(LRegion, PHI, PHIRegionIndices); 2438 2439 assert(PHIRegionIndices.size() == 1); 2440 2441 unsigned RegionIndex = PHIRegionIndices[0]; 2442 unsigned RegionSourceReg = getPHISourceReg(PHI, RegionIndex); 2443 MachineBasicBlock *RegionSourceMBB = getPHIPred(PHI, RegionIndex); 2444 unsigned PHIDest = getPHIDestReg(PHI); 2445 unsigned PHISource = PHIDest; 2446 unsigned ReplaceReg; 2447 2448 if (shrinkPHI(PHI, PHIRegionIndices, &ReplaceReg)) { 2449 PHISource = ReplaceReg; 2450 } 2451 2452 const TargetRegisterClass *RegClass = MRI->getRegClass(PHIDest); 2453 Register NewDestReg = MRI->createVirtualRegister(RegClass); 2454 LRegion->replaceRegisterInsideRegion(PHIDest, NewDestReg, false, MRI); 2455 MachineInstrBuilder MIB = 2456 BuildMI(*EntrySucc, EntrySucc->instr_begin(), PHI.getDebugLoc(), 2457 TII->get(TargetOpcode::PHI), NewDestReg); 2458 LLVM_DEBUG(dbgs() << "Split Entry PHI " << printReg(NewDestReg, TRI) 2459 << " = PHI("); 2460 MIB.addReg(PHISource); 2461 MIB.addMBB(Entry); 2462 LLVM_DEBUG(dbgs() << printReg(PHISource, TRI) << ", " 2463 << printMBBReference(*Entry)); 2464 MIB.addReg(RegionSourceReg); 2465 MIB.addMBB(RegionSourceMBB); 2466 LLVM_DEBUG(dbgs() << " ," << printReg(RegionSourceReg, TRI) << ", " 2467 << printMBBReference(*RegionSourceMBB) << ")\n"); 2468 } 2469 2470 void AMDGPUMachineCFGStructurizer::splitLoopPHIs(MachineBasicBlock *Entry, 2471 MachineBasicBlock *EntrySucc, 2472 LinearizedRegion *LRegion) { 2473 SmallVector<MachineInstr *, 2> PHIs; 2474 collectPHIs(Entry, PHIs); 2475 2476 for (auto PHII : PHIs) { 2477 splitLoopPHI(*PHII, Entry, EntrySucc, LRegion); 2478 } 2479 } 2480 2481 // Split the exit block so that we can insert a end control flow 2482 MachineBasicBlock * 2483 AMDGPUMachineCFGStructurizer::splitExit(LinearizedRegion *LRegion) { 2484 auto MRTRegion = LRegion->getRegionMRT(); 2485 auto Exit = LRegion->getExit(); 2486 auto MF = Exit->getParent(); 2487 auto Succ = MRTRegion->getSucc(); 2488 2489 auto NewExit = MF->CreateMachineBasicBlock(); 2490 auto AfterExitIter = Exit->getIterator(); 2491 AfterExitIter++; 2492 MF->insert(AfterExitIter, NewExit); 2493 Exit->removeSuccessor(Succ); 2494 Exit->addSuccessor(NewExit); 2495 NewExit->addSuccessor(Succ); 2496 insertUnconditionalBranch(NewExit, Succ); 2497 LRegion->addMBB(NewExit); 2498 LRegion->setExit(NewExit); 2499 2500 LLVM_DEBUG(dbgs() << "Created new exit block: " << NewExit->getNumber() 2501 << "\n"); 2502 2503 // Replace any PHI Predecessors in the successor with NewExit 2504 for (auto &II : *Succ) { 2505 MachineInstr &Instr = II; 2506 2507 // If we are past the PHI instructions we are done 2508 if (!Instr.isPHI()) 2509 break; 2510 2511 int numPreds = getPHINumInputs(Instr); 2512 for (int i = 0; i < numPreds; ++i) { 2513 auto Pred = getPHIPred(Instr, i); 2514 if (Pred == Exit) { 2515 setPhiPred(Instr, i, NewExit); 2516 } 2517 } 2518 } 2519 2520 return NewExit; 2521 } 2522 2523 static MachineBasicBlock *split(MachineBasicBlock::iterator I) { 2524 // Create the fall-through block. 2525 MachineBasicBlock *MBB = (*I).getParent(); 2526 MachineFunction *MF = MBB->getParent(); 2527 MachineBasicBlock *SuccMBB = MF->CreateMachineBasicBlock(); 2528 auto MBBIter = ++(MBB->getIterator()); 2529 MF->insert(MBBIter, SuccMBB); 2530 SuccMBB->transferSuccessorsAndUpdatePHIs(MBB); 2531 MBB->addSuccessor(SuccMBB); 2532 2533 // Splice the code over. 2534 SuccMBB->splice(SuccMBB->end(), MBB, I, MBB->end()); 2535 2536 return SuccMBB; 2537 } 2538 2539 // Split the entry block separating PHI-nodes and the rest of the code 2540 // This is needed to insert an initializer for the bb select register 2541 // inloop regions. 2542 2543 MachineBasicBlock * 2544 AMDGPUMachineCFGStructurizer::splitEntry(LinearizedRegion *LRegion) { 2545 MachineBasicBlock *Entry = LRegion->getEntry(); 2546 MachineBasicBlock *EntrySucc = split(Entry->getFirstNonPHI()); 2547 MachineBasicBlock *Exit = LRegion->getExit(); 2548 2549 LLVM_DEBUG(dbgs() << "Split " << printMBBReference(*Entry) << " to " 2550 << printMBBReference(*Entry) << " -> " 2551 << printMBBReference(*EntrySucc) << "\n"); 2552 LRegion->addMBB(EntrySucc); 2553 2554 // Make the backedge go to Entry Succ 2555 if (Exit->isSuccessor(Entry)) { 2556 Exit->removeSuccessor(Entry); 2557 } 2558 Exit->addSuccessor(EntrySucc); 2559 MachineInstr &Branch = *(Exit->instr_rbegin()); 2560 for (auto &UI : Branch.uses()) { 2561 if (UI.isMBB() && UI.getMBB() == Entry) { 2562 UI.setMBB(EntrySucc); 2563 } 2564 } 2565 2566 splitLoopPHIs(Entry, EntrySucc, LRegion); 2567 2568 return EntrySucc; 2569 } 2570 2571 LinearizedRegion * 2572 AMDGPUMachineCFGStructurizer::initLinearizedRegion(RegionMRT *Region) { 2573 LinearizedRegion *LRegion = Region->getLinearizedRegion(); 2574 LRegion->initLiveOut(Region, MRI, TRI, PHIInfo); 2575 LRegion->setEntry(Region->getEntry()); 2576 return LRegion; 2577 } 2578 2579 static void removeOldExitPreds(RegionMRT *Region) { 2580 MachineBasicBlock *Exit = Region->getSucc(); 2581 if (Exit == nullptr) { 2582 return; 2583 } 2584 for (MachineBasicBlock::pred_iterator PI = Exit->pred_begin(), 2585 E = Exit->pred_end(); 2586 PI != E; ++PI) { 2587 if (Region->contains(*PI)) { 2588 (*PI)->removeSuccessor(Exit); 2589 } 2590 } 2591 } 2592 2593 static bool mbbHasBackEdge(MachineBasicBlock *MBB, 2594 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { 2595 for (auto SI = MBB->succ_begin(), SE = MBB->succ_end(); SI != SE; ++SI) { 2596 if (MBBs.count(*SI) != 0) { 2597 return true; 2598 } 2599 } 2600 return false; 2601 } 2602 2603 static bool containsNewBackedge(MRT *Tree, 2604 SmallPtrSet<MachineBasicBlock *, 8> &MBBs) { 2605 // Need to traverse this in reverse since it is in post order. 2606 if (Tree == nullptr) 2607 return false; 2608 2609 if (Tree->isMBB()) { 2610 MachineBasicBlock *MBB = Tree->getMBBMRT()->getMBB(); 2611 MBBs.insert(MBB); 2612 if (mbbHasBackEdge(MBB, MBBs)) { 2613 return true; 2614 } 2615 } else { 2616 RegionMRT *Region = Tree->getRegionMRT(); 2617 SetVector<MRT *> *Children = Region->getChildren(); 2618 for (auto CI = Children->rbegin(), CE = Children->rend(); CI != CE; ++CI) { 2619 if (containsNewBackedge(*CI, MBBs)) 2620 return true; 2621 } 2622 } 2623 return false; 2624 } 2625 2626 static bool containsNewBackedge(RegionMRT *Region) { 2627 SmallPtrSet<MachineBasicBlock *, 8> MBBs; 2628 return containsNewBackedge(Region, MBBs); 2629 } 2630 2631 bool AMDGPUMachineCFGStructurizer::structurizeComplexRegion(RegionMRT *Region) { 2632 auto *LRegion = initLinearizedRegion(Region); 2633 LRegion->setHasLoop(containsNewBackedge(Region)); 2634 MachineBasicBlock *LastMerge = createLinearizedExitBlock(Region); 2635 MachineBasicBlock *CurrentMerge = LastMerge; 2636 LRegion->addMBB(LastMerge); 2637 LRegion->setExit(LastMerge); 2638 2639 rewriteRegionExitPHIs(Region, LastMerge, LRegion); 2640 removeOldExitPreds(Region); 2641 2642 LLVM_DEBUG(PHIInfo.dump(MRI)); 2643 2644 SetVector<MRT *> *Children = Region->getChildren(); 2645 LLVM_DEBUG(dbgs() << "===========If Region Start===============\n"); 2646 if (LRegion->getHasLoop()) { 2647 LLVM_DEBUG(dbgs() << "Has Backedge: Yes\n"); 2648 } else { 2649 LLVM_DEBUG(dbgs() << "Has Backedge: No\n"); 2650 } 2651 2652 unsigned BBSelectRegIn; 2653 unsigned BBSelectRegOut; 2654 for (auto CI = Children->begin(), CE = Children->end(); CI != CE; ++CI) { 2655 LLVM_DEBUG(dbgs() << "CurrentRegion: \n"); 2656 LLVM_DEBUG(LRegion->print(dbgs(), TRI)); 2657 2658 auto CNI = CI; 2659 ++CNI; 2660 2661 MRT *Child = (*CI); 2662 2663 if (Child->isRegion()) { 2664 2665 LinearizedRegion *InnerLRegion = 2666 Child->getRegionMRT()->getLinearizedRegion(); 2667 // We found the block is the exit of an inner region, we need 2668 // to put it in the current linearized region. 2669 2670 LLVM_DEBUG(dbgs() << "Linearizing region: "); 2671 LLVM_DEBUG(InnerLRegion->print(dbgs(), TRI)); 2672 LLVM_DEBUG(dbgs() << "\n"); 2673 2674 MachineBasicBlock *InnerEntry = InnerLRegion->getEntry(); 2675 if ((&(*(InnerEntry->getParent()->begin()))) == InnerEntry) { 2676 // Entry has already been linearized, no need to do this region. 2677 unsigned OuterSelect = InnerLRegion->getBBSelectRegOut(); 2678 unsigned InnerSelectReg = 2679 InnerLRegion->getRegionMRT()->getInnerOutputRegister(); 2680 replaceRegisterWith(InnerSelectReg, OuterSelect), 2681 resolvePHIInfos(InnerEntry); 2682 if (!InnerLRegion->getExit()->isSuccessor(CurrentMerge)) 2683 InnerLRegion->getExit()->addSuccessor(CurrentMerge); 2684 continue; 2685 } 2686 2687 BBSelectRegOut = Child->getBBSelectRegOut(); 2688 BBSelectRegIn = Child->getBBSelectRegIn(); 2689 2690 LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI) 2691 << "\n"); 2692 LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI) 2693 << "\n"); 2694 2695 MachineBasicBlock *IfEnd = CurrentMerge; 2696 CurrentMerge = createIfRegion(CurrentMerge, InnerLRegion, LRegion, 2697 Child->getRegionMRT()->getEntry(), 2698 BBSelectRegIn, BBSelectRegOut); 2699 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); 2700 } else { 2701 MachineBasicBlock *MBB = Child->getMBBMRT()->getMBB(); 2702 LLVM_DEBUG(dbgs() << "Linearizing block: " << MBB->getNumber() << "\n"); 2703 2704 if (MBB == getSingleExitNode(*(MBB->getParent()))) { 2705 // If this is the exit block then we need to skip to the next. 2706 // The "in" register will be transferred to "out" in the next 2707 // iteration. 2708 continue; 2709 } 2710 2711 BBSelectRegOut = Child->getBBSelectRegOut(); 2712 BBSelectRegIn = Child->getBBSelectRegIn(); 2713 2714 LLVM_DEBUG(dbgs() << "BBSelectRegIn: " << printReg(BBSelectRegIn, TRI) 2715 << "\n"); 2716 LLVM_DEBUG(dbgs() << "BBSelectRegOut: " << printReg(BBSelectRegOut, TRI) 2717 << "\n"); 2718 2719 MachineBasicBlock *IfEnd = CurrentMerge; 2720 // This is a basic block that is not part of an inner region, we 2721 // need to put it in the current linearized region. 2722 CurrentMerge = createIfRegion(CurrentMerge, MBB, LRegion, BBSelectRegIn, 2723 BBSelectRegOut); 2724 if (CurrentMerge) { 2725 TII->convertNonUniformIfRegion(CurrentMerge, IfEnd); 2726 } 2727 2728 LLVM_DEBUG(PHIInfo.dump(MRI)); 2729 } 2730 } 2731 2732 LRegion->removeFalseRegisterKills(MRI); 2733 2734 if (LRegion->getHasLoop()) { 2735 MachineBasicBlock *NewSucc = splitEntry(LRegion); 2736 if (isFunctionEntryBlock(LRegion->getEntry())) { 2737 resolvePHIInfos(LRegion->getEntry()); 2738 } 2739 const DebugLoc &DL = NewSucc->findDebugLoc(NewSucc->getFirstNonPHI()); 2740 unsigned InReg = LRegion->getBBSelectRegIn(); 2741 Register InnerSelectReg = 2742 MRI->createVirtualRegister(MRI->getRegClass(InReg)); 2743 Register NewInReg = MRI->createVirtualRegister(MRI->getRegClass(InReg)); 2744 TII->materializeImmediate(*(LRegion->getEntry()), 2745 LRegion->getEntry()->getFirstTerminator(), DL, 2746 NewInReg, Region->getEntry()->getNumber()); 2747 // Need to be careful about updating the registers inside the region. 2748 LRegion->replaceRegisterInsideRegion(InReg, InnerSelectReg, false, MRI); 2749 LLVM_DEBUG(dbgs() << "Loop BBSelect Merge PHI:\n"); 2750 insertMergePHI(LRegion->getEntry(), LRegion->getExit(), NewSucc, 2751 InnerSelectReg, NewInReg, 2752 LRegion->getRegionMRT()->getInnerOutputRegister()); 2753 splitExit(LRegion); 2754 TII->convertNonUniformLoopRegion(NewSucc, LastMerge); 2755 } 2756 2757 if (Region->isRoot()) { 2758 TII->insertReturn(*LastMerge); 2759 } 2760 2761 LLVM_DEBUG(Region->getEntry()->getParent()->dump()); 2762 LLVM_DEBUG(LRegion->print(dbgs(), TRI)); 2763 LLVM_DEBUG(PHIInfo.dump(MRI)); 2764 2765 LLVM_DEBUG(dbgs() << "===========If Region End===============\n"); 2766 2767 Region->setLinearizedRegion(LRegion); 2768 return true; 2769 } 2770 2771 bool AMDGPUMachineCFGStructurizer::structurizeRegion(RegionMRT *Region) { 2772 if (false && regionIsSimpleIf(Region)) { 2773 transformSimpleIfRegion(Region); 2774 return true; 2775 } else if (regionIsSequence(Region)) { 2776 fixupRegionExits(Region); 2777 return false; 2778 } else { 2779 structurizeComplexRegion(Region); 2780 } 2781 return false; 2782 } 2783 2784 static int structurize_once = 0; 2785 2786 bool AMDGPUMachineCFGStructurizer::structurizeRegions(RegionMRT *Region, 2787 bool isTopRegion) { 2788 bool Changed = false; 2789 2790 auto Children = Region->getChildren(); 2791 for (auto CI : *Children) { 2792 if (CI->isRegion()) { 2793 Changed |= structurizeRegions(CI->getRegionMRT(), false); 2794 } 2795 } 2796 2797 if (structurize_once < 2 || true) { 2798 Changed |= structurizeRegion(Region); 2799 structurize_once++; 2800 } 2801 return Changed; 2802 } 2803 2804 void AMDGPUMachineCFGStructurizer::initFallthroughMap(MachineFunction &MF) { 2805 LLVM_DEBUG(dbgs() << "Fallthrough Map:\n"); 2806 for (auto &MBBI : MF) { 2807 MachineBasicBlock *MBB = MBBI.getFallThrough(); 2808 if (MBB != nullptr) { 2809 LLVM_DEBUG(dbgs() << "Fallthrough: " << MBBI.getNumber() << " -> " 2810 << MBB->getNumber() << "\n"); 2811 } 2812 FallthroughMap[&MBBI] = MBB; 2813 } 2814 } 2815 2816 void AMDGPUMachineCFGStructurizer::createLinearizedRegion(RegionMRT *Region, 2817 unsigned SelectOut) { 2818 LinearizedRegion *LRegion = new LinearizedRegion(); 2819 if (SelectOut) { 2820 LRegion->addLiveOut(SelectOut); 2821 LLVM_DEBUG(dbgs() << "Add LiveOut (BBSelect): " << printReg(SelectOut, TRI) 2822 << "\n"); 2823 } 2824 LRegion->setRegionMRT(Region); 2825 Region->setLinearizedRegion(LRegion); 2826 LRegion->setParent(Region->getParent() 2827 ? Region->getParent()->getLinearizedRegion() 2828 : nullptr); 2829 } 2830 2831 unsigned 2832 AMDGPUMachineCFGStructurizer::initializeSelectRegisters(MRT *MRT, unsigned SelectOut, 2833 MachineRegisterInfo *MRI, 2834 const SIInstrInfo *TII) { 2835 if (MRT->isRegion()) { 2836 RegionMRT *Region = MRT->getRegionMRT(); 2837 Region->setBBSelectRegOut(SelectOut); 2838 unsigned InnerSelectOut = createBBSelectReg(TII, MRI); 2839 2840 // Fixme: Move linearization creation to the original spot 2841 createLinearizedRegion(Region, SelectOut); 2842 2843 for (auto CI = Region->getChildren()->begin(), 2844 CE = Region->getChildren()->end(); 2845 CI != CE; ++CI) { 2846 InnerSelectOut = 2847 initializeSelectRegisters((*CI), InnerSelectOut, MRI, TII); 2848 } 2849 MRT->setBBSelectRegIn(InnerSelectOut); 2850 return InnerSelectOut; 2851 } else { 2852 MRT->setBBSelectRegOut(SelectOut); 2853 unsigned NewSelectIn = createBBSelectReg(TII, MRI); 2854 MRT->setBBSelectRegIn(NewSelectIn); 2855 return NewSelectIn; 2856 } 2857 } 2858 2859 static void checkRegOnlyPHIInputs(MachineFunction &MF) { 2860 for (auto &MBBI : MF) { 2861 for (MachineBasicBlock::instr_iterator I = MBBI.instr_begin(), 2862 E = MBBI.instr_end(); 2863 I != E; ++I) { 2864 MachineInstr &Instr = *I; 2865 if (Instr.isPHI()) { 2866 int numPreds = getPHINumInputs(Instr); 2867 for (int i = 0; i < numPreds; ++i) { 2868 assert(Instr.getOperand(i * 2 + 1).isReg() && 2869 "PHI Operand not a register"); 2870 } 2871 } 2872 } 2873 } 2874 } 2875 2876 bool AMDGPUMachineCFGStructurizer::runOnMachineFunction(MachineFunction &MF) { 2877 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 2878 const SIInstrInfo *TII = ST.getInstrInfo(); 2879 TRI = ST.getRegisterInfo(); 2880 MRI = &(MF.getRegInfo()); 2881 initFallthroughMap(MF); 2882 2883 checkRegOnlyPHIInputs(MF); 2884 LLVM_DEBUG(dbgs() << "----STRUCTURIZER START----\n"); 2885 LLVM_DEBUG(MF.dump()); 2886 2887 Regions = &(getAnalysis<MachineRegionInfoPass>().getRegionInfo()); 2888 LLVM_DEBUG(Regions->dump()); 2889 2890 RegionMRT *RTree = MRT::buildMRT(MF, Regions, TII, MRI); 2891 setRegionMRT(RTree); 2892 initializeSelectRegisters(RTree, 0, MRI, TII); 2893 LLVM_DEBUG(RTree->dump(TRI)); 2894 bool result = structurizeRegions(RTree, true); 2895 delete RTree; 2896 LLVM_DEBUG(dbgs() << "----STRUCTURIZER END----\n"); 2897 initFallthroughMap(MF); 2898 return result; 2899 } 2900 2901 char AMDGPUMachineCFGStructurizerID = AMDGPUMachineCFGStructurizer::ID; 2902 2903 INITIALIZE_PASS_BEGIN(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", 2904 "AMDGPU Machine CFG Structurizer", false, false) 2905 INITIALIZE_PASS_DEPENDENCY(MachineRegionInfoPass) 2906 INITIALIZE_PASS_END(AMDGPUMachineCFGStructurizer, "amdgpu-machine-cfg-structurizer", 2907 "AMDGPU Machine CFG Structurizer", false, false) 2908 2909 FunctionPass *llvm::createAMDGPUMachineCFGStructurizerPass() { 2910 return new AMDGPUMachineCFGStructurizer(); 2911 } 2912