1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //==-----------------------------------------------------------------------===// 10 11 #include "AMDGPU.h" 12 #include "AMDGPUSubtarget.h" 13 #include "R600InstrInfo.h" 14 #include "R600RegisterInfo.h" 15 #include "llvm/ADT/DepthFirstIterator.h" 16 #include "llvm/ADT/SCCIterator.h" 17 #include "llvm/ADT/SmallPtrSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/StringRef.h" 21 #include "llvm/CodeGen/MachineBasicBlock.h" 22 #include "llvm/CodeGen/MachineDominators.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineJumpTableInfo.h" 28 #include "llvm/CodeGen/MachineLoopInfo.h" 29 #include "llvm/CodeGen/MachineOperand.h" 30 #include "llvm/CodeGen/MachinePostDominators.h" 31 #include "llvm/CodeGen/MachineRegisterInfo.h" 32 #include "llvm/CodeGen/MachineValueType.h" 33 #include "llvm/IR/DebugLoc.h" 34 #include "llvm/IR/LLVMContext.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/ErrorHandling.h" 38 #include "llvm/Support/raw_ostream.h" 39 #include <cassert> 40 #include <cstddef> 41 #include <deque> 42 #include <iterator> 43 #include <map> 44 #include <utility> 45 #include <vector> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "structcfg" 50 51 #define DEFAULT_VEC_SLOTS 8 52 53 // TODO: move-begin. 54 55 //===----------------------------------------------------------------------===// 56 // 57 // Statistics for CFGStructurizer. 58 // 59 //===----------------------------------------------------------------------===// 60 61 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 62 "matched"); 63 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 64 "matched"); 65 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 66 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 67 68 namespace llvm { 69 70 void initializeAMDGPUCFGStructurizerPass(PassRegistry&); 71 72 } // end namespace llvm 73 74 namespace { 75 76 //===----------------------------------------------------------------------===// 77 // 78 // Miscellaneous utility for CFGStructurizer. 79 // 80 //===----------------------------------------------------------------------===// 81 82 #define SHOWNEWINSTR(i) \ 83 DEBUG(dbgs() << "New instr: " << *i << "\n"); 84 85 #define SHOWNEWBLK(b, msg) \ 86 DEBUG( \ 87 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 88 dbgs() << "\n"; \ 89 ); 90 91 #define SHOWBLK_DETAIL(b, msg) \ 92 DEBUG( \ 93 if (b) { \ 94 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 95 b->print(dbgs()); \ 96 dbgs() << "\n"; \ 97 } \ 98 ); 99 100 #define INVALIDSCCNUM -1 101 102 //===----------------------------------------------------------------------===// 103 // 104 // supporting data structure for CFGStructurizer 105 // 106 //===----------------------------------------------------------------------===// 107 108 class BlockInformation { 109 public: 110 bool IsRetired = false; 111 int SccNum = INVALIDSCCNUM; 112 113 BlockInformation() = default; 114 }; 115 116 //===----------------------------------------------------------------------===// 117 // 118 // CFGStructurizer 119 // 120 //===----------------------------------------------------------------------===// 121 122 class AMDGPUCFGStructurizer : public MachineFunctionPass { 123 public: 124 typedef SmallVector<MachineBasicBlock *, 32> MBBVector; 125 typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap; 126 typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap; 127 128 enum PathToKind { 129 Not_SinglePath = 0, 130 SinglePath_InPath = 1, 131 SinglePath_NotInPath = 2 132 }; 133 134 static char ID; 135 136 AMDGPUCFGStructurizer() : MachineFunctionPass(ID) { 137 initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); 138 } 139 140 StringRef getPassName() const override { 141 return "AMDGPU Control Flow Graph structurizer Pass"; 142 } 143 144 void getAnalysisUsage(AnalysisUsage &AU) const override { 145 AU.addRequired<MachineDominatorTree>(); 146 AU.addRequired<MachinePostDominatorTree>(); 147 AU.addRequired<MachineLoopInfo>(); 148 MachineFunctionPass::getAnalysisUsage(AU); 149 } 150 151 /// Perform the CFG structurization 152 bool run(); 153 154 /// Perform the CFG preparation 155 /// This step will remove every unconditionnal/dead jump instructions and make 156 /// sure all loops have an exit block 157 bool prepare(); 158 159 bool runOnMachineFunction(MachineFunction &MF) override { 160 TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); 161 TRI = &TII->getRegisterInfo(); 162 DEBUG(MF.dump();); 163 OrderedBlks.clear(); 164 Visited.clear(); 165 FuncRep = &MF; 166 MLI = &getAnalysis<MachineLoopInfo>(); 167 DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); 168 MDT = &getAnalysis<MachineDominatorTree>(); 169 DEBUG(MDT->print(dbgs(), (const Module*)nullptr);); 170 PDT = &getAnalysis<MachinePostDominatorTree>(); 171 DEBUG(PDT->print(dbgs());); 172 prepare(); 173 run(); 174 DEBUG(MF.dump();); 175 return true; 176 } 177 178 protected: 179 MachineDominatorTree *MDT; 180 MachinePostDominatorTree *PDT; 181 MachineLoopInfo *MLI; 182 const R600InstrInfo *TII = nullptr; 183 const R600RegisterInfo *TRI = nullptr; 184 185 // PRINT FUNCTIONS 186 /// Print the ordered Blocks. 187 void printOrderedBlocks() const { 188 size_t i = 0; 189 for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), 190 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { 191 dbgs() << "BB" << (*iterBlk)->getNumber(); 192 dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 193 if (i != 0 && i % 10 == 0) { 194 dbgs() << "\n"; 195 } else { 196 dbgs() << " "; 197 } 198 } 199 } 200 201 static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { 202 for (MachineLoop::iterator iter = LoopInfo.begin(), 203 iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { 204 (*iter)->print(dbgs(), 0); 205 } 206 } 207 208 // UTILITY FUNCTIONS 209 int getSCCNum(MachineBasicBlock *MBB) const; 210 MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; 211 bool hasBackEdge(MachineBasicBlock *MBB) const; 212 bool isRetiredBlock(MachineBasicBlock *MBB) const; 213 bool isActiveLoophead(MachineBasicBlock *MBB) const; 214 PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 215 bool AllowSideEntry = true) const; 216 int countActiveBlock(MBBVector::const_iterator It, 217 MBBVector::const_iterator E) const; 218 bool needMigrateBlock(MachineBasicBlock *MBB) const; 219 220 // Utility Functions 221 void reversePredicateSetter(MachineBasicBlock::iterator I, 222 MachineBasicBlock &MBB); 223 /// Compute the reversed DFS post order of Blocks 224 void orderBlocks(MachineFunction *MF); 225 226 // Function originally from CFGStructTraits 227 void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, 228 const DebugLoc &DL = DebugLoc()); 229 MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, 230 const DebugLoc &DL = DebugLoc()); 231 MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); 232 void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, 233 const DebugLoc &DL); 234 void insertCondBranchBefore(MachineBasicBlock *MBB, 235 MachineBasicBlock::iterator I, int NewOpcode, 236 int RegNum, const DebugLoc &DL); 237 static int getBranchNzeroOpcode(int OldOpcode); 238 static int getBranchZeroOpcode(int OldOpcode); 239 static int getContinueNzeroOpcode(int OldOpcode); 240 static int getContinueZeroOpcode(int OldOpcode); 241 static MachineBasicBlock *getTrueBranch(MachineInstr *MI); 242 static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); 243 static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, 244 MachineInstr *MI); 245 static bool isCondBranch(MachineInstr *MI); 246 static bool isUncondBranch(MachineInstr *MI); 247 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); 248 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); 249 /// The correct naming for this is getPossibleLoopendBlockBranchInstr. 250 /// 251 /// BB with backward-edge could have move instructions after the branch 252 /// instruction. Such move instruction "belong to" the loop backward-edge. 253 MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); 254 static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); 255 static bool isReturnBlock(MachineBasicBlock *MBB); 256 static void cloneSuccessorList(MachineBasicBlock *DstMBB, 257 MachineBasicBlock *SrcMBB) ; 258 static MachineBasicBlock *clone(MachineBasicBlock *MBB); 259 /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose 260 /// because the AMDGPU instruction is not recognized as terminator fix this 261 /// and retire this routine 262 void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, 263 MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); 264 static void wrapup(MachineBasicBlock *MBB); 265 266 int patternMatch(MachineBasicBlock *MBB); 267 int patternMatchGroup(MachineBasicBlock *MBB); 268 int serialPatternMatch(MachineBasicBlock *MBB); 269 int ifPatternMatch(MachineBasicBlock *MBB); 270 int loopendPatternMatch(); 271 int mergeLoop(MachineLoop *LoopRep); 272 273 /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in 274 /// the same loop with LoopLandInfo without explicitly keeping track of 275 /// loopContBlks and loopBreakBlks, this is a method to get the information. 276 bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, 277 MachineBasicBlock *Src2MBB); 278 int handleJumpintoIf(MachineBasicBlock *HeadMBB, 279 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 280 int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 281 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 282 int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 283 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 284 MachineBasicBlock **LandMBBPtr); 285 void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 286 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 287 MachineBasicBlock *LandMBB, bool Detail = false); 288 int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 289 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); 290 void mergeSerialBlock(MachineBasicBlock *DstMBB, 291 MachineBasicBlock *SrcMBB); 292 293 void mergeIfthenelseBlock(MachineInstr *BranchMI, 294 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 295 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); 296 void mergeLooplandBlock(MachineBasicBlock *DstMBB, 297 MachineBasicBlock *LandMBB); 298 void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 299 MachineBasicBlock *LandMBB); 300 void settleLoopcontBlock(MachineBasicBlock *ContingMBB, 301 MachineBasicBlock *ContMBB); 302 /// normalizeInfiniteLoopExit change 303 /// B1: 304 /// uncond_br LoopHeader 305 /// 306 /// to 307 /// B1: 308 /// cond_br 1 LoopHeader dummyExit 309 /// and return the newly added dummy exit block 310 MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); 311 void removeUnconditionalBranch(MachineBasicBlock *MBB); 312 /// Remove duplicate branches instructions in a block. 313 /// For instance 314 /// B0: 315 /// cond_br X B1 B2 316 /// cond_br X B1 B2 317 /// is transformed to 318 /// B0: 319 /// cond_br X B1 B2 320 void removeRedundantConditionalBranch(MachineBasicBlock *MBB); 321 void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); 322 void removeSuccessor(MachineBasicBlock *MBB); 323 MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, 324 MachineBasicBlock *PredMBB); 325 void migrateInstruction(MachineBasicBlock *SrcMBB, 326 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); 327 void recordSccnum(MachineBasicBlock *MBB, int SCCNum); 328 void retireBlock(MachineBasicBlock *MBB); 329 330 private: 331 MBBInfoMap BlockInfoMap; 332 LoopLandInfoMap LLInfoMap; 333 std::map<MachineLoop *, bool> Visited; 334 MachineFunction *FuncRep; 335 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; 336 }; 337 338 char AMDGPUCFGStructurizer::ID = 0; 339 340 } // end anonymous namespace 341 342 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { 343 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 344 if (It == BlockInfoMap.end()) 345 return INVALIDSCCNUM; 346 return (*It).second->SccNum; 347 } 348 349 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) 350 const { 351 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); 352 if (It == LLInfoMap.end()) 353 return nullptr; 354 return (*It).second; 355 } 356 357 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { 358 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 359 if (!LoopRep) 360 return false; 361 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 362 return MBB->isSuccessor(LoopHeader); 363 } 364 365 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { 366 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 367 if (It == BlockInfoMap.end()) 368 return false; 369 return (*It).second->IsRetired; 370 } 371 372 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { 373 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 374 while (LoopRep && LoopRep->getHeader() == MBB) { 375 MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); 376 if(!LoopLand) 377 return true; 378 if (!isRetiredBlock(LoopLand)) 379 return true; 380 LoopRep = LoopRep->getParentLoop(); 381 } 382 return false; 383 } 384 385 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( 386 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 387 bool AllowSideEntry) const { 388 assert(DstMBB); 389 if (SrcMBB == DstMBB) 390 return SinglePath_InPath; 391 while (SrcMBB && SrcMBB->succ_size() == 1) { 392 SrcMBB = *SrcMBB->succ_begin(); 393 if (SrcMBB == DstMBB) 394 return SinglePath_InPath; 395 if (!AllowSideEntry && SrcMBB->pred_size() > 1) 396 return Not_SinglePath; 397 } 398 if (SrcMBB && SrcMBB->succ_size()==0) 399 return SinglePath_NotInPath; 400 return Not_SinglePath; 401 } 402 403 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, 404 MBBVector::const_iterator E) const { 405 int Count = 0; 406 while (It != E) { 407 if (!isRetiredBlock(*It)) 408 ++Count; 409 ++It; 410 } 411 return Count; 412 } 413 414 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { 415 unsigned BlockSizeThreshold = 30; 416 unsigned CloneInstrThreshold = 100; 417 bool MultiplePreds = MBB && (MBB->pred_size() > 1); 418 419 if(!MultiplePreds) 420 return false; 421 unsigned BlkSize = MBB->size(); 422 return ((BlkSize > BlockSizeThreshold) && 423 (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); 424 } 425 426 void AMDGPUCFGStructurizer::reversePredicateSetter( 427 MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { 428 assert(I.isValid() && "Expected valid iterator"); 429 for (;; --I) { 430 if (I == MBB.end()) 431 continue; 432 if (I->getOpcode() == AMDGPU::PRED_X) { 433 switch (I->getOperand(2).getImm()) { 434 case AMDGPU::PRED_SETE_INT: 435 I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT); 436 return; 437 case AMDGPU::PRED_SETNE_INT: 438 I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT); 439 return; 440 case AMDGPU::PRED_SETE: 441 I->getOperand(2).setImm(AMDGPU::PRED_SETNE); 442 return; 443 case AMDGPU::PRED_SETNE: 444 I->getOperand(2).setImm(AMDGPU::PRED_SETE); 445 return; 446 default: 447 llvm_unreachable("PRED_X Opcode invalid!"); 448 } 449 } 450 } 451 } 452 453 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, 454 int NewOpcode, const DebugLoc &DL) { 455 MachineInstr *MI = 456 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 457 MBB->push_back(MI); 458 //assume the instruction doesn't take any reg operand ... 459 SHOWNEWINSTR(MI); 460 } 461 462 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, 463 int NewOpcode, 464 const DebugLoc &DL) { 465 MachineInstr *MI = 466 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 467 if (MBB->begin() != MBB->end()) 468 MBB->insert(MBB->begin(), MI); 469 else 470 MBB->push_back(MI); 471 SHOWNEWINSTR(MI); 472 return MI; 473 } 474 475 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( 476 MachineBasicBlock::iterator I, int NewOpcode) { 477 MachineInstr *OldMI = &(*I); 478 MachineBasicBlock *MBB = OldMI->getParent(); 479 MachineInstr *NewMBB = 480 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 481 MBB->insert(I, NewMBB); 482 //assume the instruction doesn't take any reg operand ... 483 SHOWNEWINSTR(NewMBB); 484 return NewMBB; 485 } 486 487 void AMDGPUCFGStructurizer::insertCondBranchBefore( 488 MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { 489 MachineInstr *OldMI = &(*I); 490 MachineBasicBlock *MBB = OldMI->getParent(); 491 MachineFunction *MF = MBB->getParent(); 492 MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 493 MBB->insert(I, NewMI); 494 MachineInstrBuilder MIB(*MF, NewMI); 495 MIB.addReg(OldMI->getOperand(1).getReg(), false); 496 SHOWNEWINSTR(NewMI); 497 //erase later oldInstr->eraseFromParent(); 498 } 499 500 void AMDGPUCFGStructurizer::insertCondBranchBefore( 501 MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, 502 int RegNum, const DebugLoc &DL) { 503 MachineFunction *MF = blk->getParent(); 504 MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 505 //insert before 506 blk->insert(I, NewInstr); 507 MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 508 SHOWNEWINSTR(NewInstr); 509 } 510 511 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { 512 switch(OldOpcode) { 513 case AMDGPU::JUMP_COND: 514 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 515 case AMDGPU::BRANCH_COND_i32: 516 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; 517 default: llvm_unreachable("internal error"); 518 } 519 return -1; 520 } 521 522 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { 523 switch(OldOpcode) { 524 case AMDGPU::JUMP_COND: 525 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 526 case AMDGPU::BRANCH_COND_i32: 527 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; 528 default: llvm_unreachable("internal error"); 529 } 530 return -1; 531 } 532 533 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { 534 switch(OldOpcode) { 535 case AMDGPU::JUMP_COND: 536 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; 537 default: llvm_unreachable("internal error"); 538 }; 539 return -1; 540 } 541 542 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { 543 switch(OldOpcode) { 544 case AMDGPU::JUMP_COND: 545 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; 546 default: llvm_unreachable("internal error"); 547 } 548 return -1; 549 } 550 551 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { 552 return MI->getOperand(0).getMBB(); 553 } 554 555 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, 556 MachineBasicBlock *MBB) { 557 MI->getOperand(0).setMBB(MBB); 558 } 559 560 MachineBasicBlock * 561 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, 562 MachineInstr *MI) { 563 assert(MBB->succ_size() == 2); 564 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 565 MachineBasicBlock::succ_iterator It = MBB->succ_begin(); 566 MachineBasicBlock::succ_iterator Next = It; 567 ++Next; 568 return (*It == TrueBranch) ? *Next : *It; 569 } 570 571 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { 572 switch (MI->getOpcode()) { 573 case AMDGPU::JUMP_COND: 574 case AMDGPU::BRANCH_COND_i32: 575 case AMDGPU::BRANCH_COND_f32: return true; 576 default: 577 return false; 578 } 579 return false; 580 } 581 582 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { 583 switch (MI->getOpcode()) { 584 case AMDGPU::JUMP: 585 case AMDGPU::BRANCH: 586 return true; 587 default: 588 return false; 589 } 590 return false; 591 } 592 593 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { 594 //get DebugLoc from the first MachineBasicBlock instruction with debug info 595 DebugLoc DL; 596 for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); 597 ++It) { 598 MachineInstr *instr = &(*It); 599 if (instr->getDebugLoc()) 600 DL = instr->getDebugLoc(); 601 } 602 return DL; 603 } 604 605 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( 606 MachineBasicBlock *MBB) { 607 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 608 MachineInstr *MI = &*It; 609 if (MI && (isCondBranch(MI) || isUncondBranch(MI))) 610 return MI; 611 return nullptr; 612 } 613 614 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( 615 MachineBasicBlock *MBB) { 616 for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); 617 It != E; ++It) { 618 // FIXME: Simplify 619 MachineInstr *MI = &*It; 620 if (MI) { 621 if (isCondBranch(MI) || isUncondBranch(MI)) 622 return MI; 623 else if (!TII->isMov(MI->getOpcode())) 624 break; 625 } 626 } 627 return nullptr; 628 } 629 630 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { 631 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 632 if (It != MBB->rend()) { 633 MachineInstr *instr = &(*It); 634 if (instr->getOpcode() == AMDGPU::RETURN) 635 return instr; 636 } 637 return nullptr; 638 } 639 640 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { 641 MachineInstr *MI = getReturnInstr(MBB); 642 bool IsReturn = (MBB->succ_size() == 0); 643 if (MI) 644 assert(IsReturn); 645 else if (IsReturn) 646 DEBUG( 647 dbgs() << "BB" << MBB->getNumber() 648 <<" is return block without RETURN instr\n";); 649 return IsReturn; 650 } 651 652 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, 653 MachineBasicBlock *SrcMBB) { 654 for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), 655 iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) 656 DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of 657 } 658 659 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { 660 MachineFunction *Func = MBB->getParent(); 661 MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); 662 Func->push_back(NewMBB); //insert to function 663 for (const MachineInstr &It : *MBB) 664 NewMBB->push_back(Func->CloneMachineInstr(&It)); 665 return NewMBB; 666 } 667 668 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( 669 MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, 670 MachineBasicBlock *NewBlk) { 671 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); 672 if (BranchMI && isCondBranch(BranchMI) && 673 getTrueBranch(BranchMI) == OldMBB) 674 setTrueBranch(BranchMI, NewBlk); 675 } 676 677 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { 678 assert((!MBB->getParent()->getJumpTableInfo() 679 || MBB->getParent()->getJumpTableInfo()->isEmpty()) 680 && "found a jump table"); 681 682 //collect continue right before endloop 683 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; 684 MachineBasicBlock::iterator Pre = MBB->begin(); 685 MachineBasicBlock::iterator E = MBB->end(); 686 MachineBasicBlock::iterator It = Pre; 687 while (It != E) { 688 if (Pre->getOpcode() == AMDGPU::CONTINUE 689 && It->getOpcode() == AMDGPU::ENDLOOP) 690 ContInstr.push_back(&*Pre); 691 Pre = It; 692 ++It; 693 } 694 695 //delete continue right before endloop 696 for (unsigned i = 0; i < ContInstr.size(); ++i) 697 ContInstr[i]->eraseFromParent(); 698 699 // TODO to fix up jump table so later phase won't be confused. if 700 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 701 // there isn't such an interface yet. alternatively, replace all the other 702 // blocks in the jump table with the entryBlk //} 703 } 704 705 bool AMDGPUCFGStructurizer::prepare() { 706 bool Changed = false; 707 708 //FIXME: if not reducible flow graph, make it so ??? 709 710 DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); 711 712 orderBlocks(FuncRep); 713 714 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; 715 716 // Add an ExitBlk to loop that don't have one 717 for (MachineLoopInfo::iterator It = MLI->begin(), 718 E = MLI->end(); It != E; ++It) { 719 MachineLoop *LoopRep = (*It); 720 MBBVector ExitingMBBs; 721 LoopRep->getExitingBlocks(ExitingMBBs); 722 723 if (ExitingMBBs.size() == 0) { 724 MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); 725 if (DummyExitBlk) 726 RetBlks.push_back(DummyExitBlk); 727 } 728 } 729 730 // Remove unconditional branch instr. 731 // Add dummy exit block iff there are multiple returns. 732 for (SmallVectorImpl<MachineBasicBlock *>::const_iterator 733 It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { 734 MachineBasicBlock *MBB = *It; 735 removeUnconditionalBranch(MBB); 736 removeRedundantConditionalBranch(MBB); 737 if (isReturnBlock(MBB)) { 738 RetBlks.push_back(MBB); 739 } 740 assert(MBB->succ_size() <= 2); 741 } 742 743 if (RetBlks.size() >= 2) { 744 addDummyExitBlock(RetBlks); 745 Changed = true; 746 } 747 748 return Changed; 749 } 750 751 bool AMDGPUCFGStructurizer::run() { 752 //Assume reducible CFG... 753 DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); 754 755 #ifdef STRESSTEST 756 //Use the worse block ordering to test the algorithm. 757 ReverseVector(orderedBlks); 758 #endif 759 760 DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); 761 int NumIter = 0; 762 bool Finish = false; 763 MachineBasicBlock *MBB; 764 bool MakeProgress = false; 765 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), 766 OrderedBlks.end()); 767 768 do { 769 ++NumIter; 770 DEBUG( 771 dbgs() << "numIter = " << NumIter 772 << ", numRemaintedBlk = " << NumRemainedBlk << "\n"; 773 ); 774 775 SmallVectorImpl<MachineBasicBlock *>::const_iterator It = 776 OrderedBlks.begin(); 777 SmallVectorImpl<MachineBasicBlock *>::const_iterator E = 778 OrderedBlks.end(); 779 780 SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = 781 It; 782 MachineBasicBlock *SccBeginMBB = nullptr; 783 int SccNumBlk = 0; // The number of active blocks, init to a 784 // maximum possible number. 785 int SccNumIter; // Number of iteration in this SCC. 786 787 while (It != E) { 788 MBB = *It; 789 790 if (!SccBeginMBB) { 791 SccBeginIter = It; 792 SccBeginMBB = MBB; 793 SccNumIter = 0; 794 SccNumBlk = NumRemainedBlk; // Init to maximum possible number. 795 DEBUG( 796 dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); 797 dbgs() << "\n"; 798 ); 799 } 800 801 if (!isRetiredBlock(MBB)) 802 patternMatch(MBB); 803 804 ++It; 805 806 bool ContNextScc = true; 807 if (It == E 808 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { 809 // Just finish one scc. 810 ++SccNumIter; 811 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); 812 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { 813 DEBUG( 814 dbgs() << "Can't reduce SCC " << getSCCNum(MBB) 815 << ", sccNumIter = " << SccNumIter; 816 dbgs() << "doesn't make any progress\n"; 817 ); 818 ContNextScc = true; 819 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { 820 SccNumBlk = sccRemainedNumBlk; 821 It = SccBeginIter; 822 ContNextScc = false; 823 DEBUG( 824 dbgs() << "repeat processing SCC" << getSCCNum(MBB) 825 << "sccNumIter = " << SccNumIter << '\n'; 826 ); 827 } else { 828 // Finish the current scc. 829 ContNextScc = true; 830 } 831 } else { 832 // Continue on next component in the current scc. 833 ContNextScc = false; 834 } 835 836 if (ContNextScc) 837 SccBeginMBB = nullptr; 838 } //while, "one iteration" over the function. 839 840 MachineBasicBlock *EntryMBB = 841 *GraphTraits<MachineFunction *>::nodes_begin(FuncRep); 842 if (EntryMBB->succ_size() == 0) { 843 Finish = true; 844 DEBUG( 845 dbgs() << "Reduce to one block\n"; 846 ); 847 } else { 848 int NewnumRemainedBlk 849 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); 850 // consider cloned blocks ?? 851 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { 852 MakeProgress = true; 853 NumRemainedBlk = NewnumRemainedBlk; 854 } else { 855 MakeProgress = false; 856 DEBUG( 857 dbgs() << "No progress\n"; 858 ); 859 } 860 } 861 } while (!Finish && MakeProgress); 862 863 // Misc wrap up to maintain the consistency of the Function representation. 864 wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep)); 865 866 // Detach retired Block, release memory. 867 for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); 868 It != E; ++It) { 869 if ((*It).second && (*It).second->IsRetired) { 870 assert(((*It).first)->getNumber() != -1); 871 DEBUG( 872 dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n"; 873 ); 874 (*It).first->eraseFromParent(); //Remove from the parent Function. 875 } 876 delete (*It).second; 877 } 878 BlockInfoMap.clear(); 879 LLInfoMap.clear(); 880 881 if (!Finish) { 882 DEBUG(FuncRep->viewCFG()); 883 report_fatal_error("IRREDUCIBLE_CFG"); 884 } 885 886 return true; 887 } 888 889 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { 890 int SccNum = 0; 891 MachineBasicBlock *MBB; 892 for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd(); 893 ++It, ++SccNum) { 894 const std::vector<MachineBasicBlock *> &SccNext = *It; 895 for (std::vector<MachineBasicBlock *>::const_iterator 896 blockIter = SccNext.begin(), blockEnd = SccNext.end(); 897 blockIter != blockEnd; ++blockIter) { 898 MBB = *blockIter; 899 OrderedBlks.push_back(MBB); 900 recordSccnum(MBB, SccNum); 901 } 902 } 903 904 // walk through all the block in func to check for unreachable 905 for (auto *MBB : nodes(MF)) { 906 SccNum = getSCCNum(MBB); 907 if (SccNum == INVALIDSCCNUM) 908 dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; 909 } 910 } 911 912 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { 913 int NumMatch = 0; 914 int CurMatch; 915 916 DEBUG( 917 dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n"; 918 ); 919 920 while ((CurMatch = patternMatchGroup(MBB)) > 0) 921 NumMatch += CurMatch; 922 923 DEBUG( 924 dbgs() << "End patternMatch BB" << MBB->getNumber() 925 << ", numMatch = " << NumMatch << "\n"; 926 ); 927 928 return NumMatch; 929 } 930 931 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { 932 int NumMatch = 0; 933 NumMatch += loopendPatternMatch(); 934 NumMatch += serialPatternMatch(MBB); 935 NumMatch += ifPatternMatch(MBB); 936 return NumMatch; 937 } 938 939 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { 940 if (MBB->succ_size() != 1) 941 return 0; 942 943 MachineBasicBlock *childBlk = *MBB->succ_begin(); 944 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) 945 return 0; 946 947 mergeSerialBlock(MBB, childBlk); 948 ++numSerialPatternMatch; 949 return 1; 950 } 951 952 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { 953 //two edges 954 if (MBB->succ_size() != 2) 955 return 0; 956 if (hasBackEdge(MBB)) 957 return 0; 958 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 959 if (!BranchMI) 960 return 0; 961 962 assert(isCondBranch(BranchMI)); 963 int NumMatch = 0; 964 965 MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); 966 NumMatch += serialPatternMatch(TrueMBB); 967 NumMatch += ifPatternMatch(TrueMBB); 968 MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); 969 NumMatch += serialPatternMatch(FalseMBB); 970 NumMatch += ifPatternMatch(FalseMBB); 971 MachineBasicBlock *LandBlk; 972 int Cloned = 0; 973 974 assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); 975 // TODO: Simplify 976 if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 977 && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { 978 // Diamond pattern 979 LandBlk = *TrueMBB->succ_begin(); 980 } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { 981 // Triangle pattern, false is empty 982 LandBlk = FalseMBB; 983 FalseMBB = nullptr; 984 } else if (FalseMBB->succ_size() == 1 985 && *FalseMBB->succ_begin() == TrueMBB) { 986 // Triangle pattern, true is empty 987 // We reverse the predicate to make a triangle, empty false pattern; 988 std::swap(TrueMBB, FalseMBB); 989 reversePredicateSetter(MBB->end(), *MBB); 990 LandBlk = FalseMBB; 991 FalseMBB = nullptr; 992 } else if (FalseMBB->succ_size() == 1 993 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { 994 LandBlk = *FalseMBB->succ_begin(); 995 } else if (TrueMBB->succ_size() == 1 996 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { 997 LandBlk = *TrueMBB->succ_begin(); 998 } else { 999 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); 1000 } 1001 1002 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 1003 // new BB created for landBlk==NULL may introduce new challenge to the 1004 // reduction process. 1005 if (LandBlk && 1006 ((TrueMBB && TrueMBB->pred_size() > 1) 1007 || (FalseMBB && FalseMBB->pred_size() > 1))) { 1008 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); 1009 } 1010 1011 if (TrueMBB && TrueMBB->pred_size() > 1) { 1012 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); 1013 ++Cloned; 1014 } 1015 1016 if (FalseMBB && FalseMBB->pred_size() > 1) { 1017 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); 1018 ++Cloned; 1019 } 1020 1021 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); 1022 1023 ++numIfPatternMatch; 1024 1025 numClonedBlock += Cloned; 1026 1027 return 1 + Cloned + NumMatch; 1028 } 1029 1030 int AMDGPUCFGStructurizer::loopendPatternMatch() { 1031 std::deque<MachineLoop *> NestedLoops; 1032 for (auto &It: *MLI) 1033 for (MachineLoop *ML : depth_first(It)) 1034 NestedLoops.push_front(ML); 1035 1036 if (NestedLoops.empty()) 1037 return 0; 1038 1039 // Process nested loop outside->inside (we did push_front), 1040 // so "continue" to a outside loop won't be mistaken as "break" 1041 // of the current loop. 1042 int Num = 0; 1043 for (MachineLoop *ExaminedLoop : NestedLoops) { 1044 if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) 1045 continue; 1046 DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); 1047 int NumBreak = mergeLoop(ExaminedLoop); 1048 if (NumBreak == -1) 1049 break; 1050 Num += NumBreak; 1051 } 1052 return Num; 1053 } 1054 1055 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { 1056 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1057 MBBVector ExitingMBBs; 1058 LoopRep->getExitingBlocks(ExitingMBBs); 1059 assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); 1060 DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";); 1061 // We assume a single ExitBlk 1062 MBBVector ExitBlks; 1063 LoopRep->getExitBlocks(ExitBlks); 1064 SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; 1065 for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) 1066 ExitBlkSet.insert(ExitBlks[i]); 1067 assert(ExitBlkSet.size() == 1); 1068 MachineBasicBlock *ExitBlk = *ExitBlks.begin(); 1069 assert(ExitBlk && "Loop has several exit block"); 1070 MBBVector LatchBlks; 1071 for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader)) 1072 if (LoopRep->contains(LB)) 1073 LatchBlks.push_back(LB); 1074 1075 for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) 1076 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); 1077 for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) 1078 settleLoopcontBlock(LatchBlks[i], LoopHeader); 1079 int Match = 0; 1080 do { 1081 Match = 0; 1082 Match += serialPatternMatch(LoopHeader); 1083 Match += ifPatternMatch(LoopHeader); 1084 } while (Match > 0); 1085 mergeLooplandBlock(LoopHeader, ExitBlk); 1086 MachineLoop *ParentLoop = LoopRep->getParentLoop(); 1087 if (ParentLoop) 1088 MLI->changeLoopFor(LoopHeader, ParentLoop); 1089 else 1090 MLI->removeBlock(LoopHeader); 1091 Visited[LoopRep] = true; 1092 return 1; 1093 } 1094 1095 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( 1096 MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { 1097 if (Src1MBB->succ_size() == 0) { 1098 MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); 1099 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { 1100 MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; 1101 if (TheEntry) { 1102 DEBUG( 1103 dbgs() << "isLoopContBreakBlock yes src1 = BB" 1104 << Src1MBB->getNumber() 1105 << " src2 = BB" << Src2MBB->getNumber() << "\n"; 1106 ); 1107 return true; 1108 } 1109 } 1110 } 1111 return false; 1112 } 1113 1114 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, 1115 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1116 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); 1117 if (Num == 0) { 1118 DEBUG( 1119 dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; 1120 ); 1121 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); 1122 } 1123 return Num; 1124 } 1125 1126 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 1127 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1128 int Num = 0; 1129 MachineBasicBlock *DownBlk; 1130 1131 //trueBlk could be the common post dominator 1132 DownBlk = TrueMBB; 1133 1134 DEBUG( 1135 dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() 1136 << " true = BB" << TrueMBB->getNumber() 1137 << ", numSucc=" << TrueMBB->succ_size() 1138 << " false = BB" << FalseMBB->getNumber() << "\n"; 1139 ); 1140 1141 while (DownBlk) { 1142 DEBUG( 1143 dbgs() << "check down = BB" << DownBlk->getNumber(); 1144 ); 1145 1146 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { 1147 DEBUG( 1148 dbgs() << " working\n"; 1149 ); 1150 1151 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); 1152 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); 1153 1154 numClonedBlock += Num; 1155 Num += serialPatternMatch(*HeadMBB->succ_begin()); 1156 Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); 1157 Num += ifPatternMatch(HeadMBB); 1158 assert(Num > 0); 1159 1160 break; 1161 } 1162 DEBUG( 1163 dbgs() << " not working\n"; 1164 ); 1165 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; 1166 } // walk down the postDomTree 1167 1168 return Num; 1169 } 1170 1171 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( 1172 MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, 1173 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { 1174 dbgs() << "head = BB" << HeadMBB->getNumber() 1175 << " size = " << HeadMBB->size(); 1176 if (Detail) { 1177 dbgs() << "\n"; 1178 HeadMBB->print(dbgs()); 1179 dbgs() << "\n"; 1180 } 1181 1182 if (TrueMBB) { 1183 dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " 1184 << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); 1185 if (Detail) { 1186 dbgs() << "\n"; 1187 TrueMBB->print(dbgs()); 1188 dbgs() << "\n"; 1189 } 1190 } 1191 if (FalseMBB) { 1192 dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " 1193 << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); 1194 if (Detail) { 1195 dbgs() << "\n"; 1196 FalseMBB->print(dbgs()); 1197 dbgs() << "\n"; 1198 } 1199 } 1200 if (LandMBB) { 1201 dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " 1202 << LandMBB->size() << " numPred = " << LandMBB->pred_size(); 1203 if (Detail) { 1204 dbgs() << "\n"; 1205 LandMBB->print(dbgs()); 1206 dbgs() << "\n"; 1207 } 1208 } 1209 1210 dbgs() << "\n"; 1211 } 1212 1213 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 1214 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 1215 MachineBasicBlock **LandMBBPtr) { 1216 bool MigrateTrue = false; 1217 bool MigrateFalse = false; 1218 1219 MachineBasicBlock *LandBlk = *LandMBBPtr; 1220 1221 assert((!TrueMBB || TrueMBB->succ_size() <= 1) 1222 && (!FalseMBB || FalseMBB->succ_size() <= 1)); 1223 1224 if (TrueMBB == FalseMBB) 1225 return 0; 1226 1227 MigrateTrue = needMigrateBlock(TrueMBB); 1228 MigrateFalse = needMigrateBlock(FalseMBB); 1229 1230 if (!MigrateTrue && !MigrateFalse) 1231 return 0; 1232 1233 // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1234 // have more than one predecessors. without doing this, its predecessor 1235 // rather than headBlk will have undefined value in initReg. 1236 if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) 1237 MigrateTrue = true; 1238 if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) 1239 MigrateFalse = true; 1240 1241 DEBUG( 1242 dbgs() << "before improveSimpleJumpintoIf: "; 1243 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); 1244 ); 1245 1246 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1247 // 1248 // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1249 // {initReg = 0; org falseBlk branch } 1250 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1251 // => org landBlk 1252 // if landBlk->pred_size() > 2, put the about if-else inside 1253 // if (initReg !=2) {...} 1254 // 1255 // add initReg = initVal to headBlk 1256 1257 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1258 if (!MigrateTrue || !MigrateFalse) { 1259 // XXX: We have an opportunity here to optimize the "branch into if" case 1260 // here. Branch into if looks like this: 1261 // entry 1262 // / | 1263 // diamond_head branch_from 1264 // / \ | 1265 // diamond_false diamond_true 1266 // \ / 1267 // done 1268 // 1269 // The diamond_head block begins the "if" and the diamond_true block 1270 // is the block being "branched into". 1271 // 1272 // If MigrateTrue is true, then TrueBB is the block being "branched into" 1273 // and if MigrateFalse is true, then FalseBB is the block being 1274 // "branched into" 1275 // 1276 // Here is the pseudo code for how I think the optimization should work: 1277 // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. 1278 // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. 1279 // 3. Move the branch instruction from diamond_head into its own basic 1280 // block (new_block). 1281 // 4. Add an unconditional branch from diamond_head to new_block 1282 // 5. Replace the branch instruction in branch_from with an unconditional 1283 // branch to new_block. If branch_from has multiple predecessors, then 1284 // we need to replace the True/False block in the branch 1285 // instruction instead of replacing it. 1286 // 6. Change the condition of the branch instruction in new_block from 1287 // COND to (COND || GPR0) 1288 // 1289 // In order insert these MOV instruction, we will need to use the 1290 // RegisterScavenger. Usually liveness stops being tracked during 1291 // the late machine optimization passes, however if we implement 1292 // bool TargetRegisterInfo::requiresRegisterScavenging( 1293 // const MachineFunction &MF) 1294 // and have it return true, liveness will be tracked correctly 1295 // by generic optimization passes. We will also need to make sure that 1296 // all of our target-specific passes that run after regalloc and before 1297 // the CFGStructurizer track liveness and we will need to modify this pass 1298 // to correctly track liveness. 1299 // 1300 // After the above changes, the new CFG should look like this: 1301 // entry 1302 // / | 1303 // diamond_head branch_from 1304 // \ / 1305 // new_block 1306 // / | 1307 // diamond_false diamond_true 1308 // \ / 1309 // done 1310 // 1311 // Without this optimization, we are forced to duplicate the diamond_true 1312 // block and we will end up with a CFG like this: 1313 // 1314 // entry 1315 // / | 1316 // diamond_head branch_from 1317 // / \ | 1318 // diamond_false diamond_true diamond_true (duplicate) 1319 // \ / | 1320 // done --------------------| 1321 // 1322 // Duplicating diamond_true can be very costly especially if it has a 1323 // lot of instructions. 1324 return 0; 1325 } 1326 1327 int NumNewBlk = 0; 1328 1329 bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); 1330 1331 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" 1332 MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF); 1333 1334 if (LandBlkHasOtherPred) { 1335 report_fatal_error("Extra register needed to handle CFG"); 1336 unsigned CmpResReg = 1337 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1338 report_fatal_error("Extra compare instruction needed to handle CFG"); 1339 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, 1340 CmpResReg, DebugLoc()); 1341 } 1342 1343 // XXX: We are running this after RA, so creating virtual registers will 1344 // cause an assertion failure in the PostRA scheduling pass. 1345 unsigned InitReg = 1346 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1347 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg, 1348 DebugLoc()); 1349 1350 if (MigrateTrue) { 1351 migrateInstruction(TrueMBB, LandBlk, I); 1352 // need to uncondionally insert the assignment to ensure a path from its 1353 // predecessor rather than headBlk has valid value in initReg if 1354 // (initVal != 1). 1355 report_fatal_error("Extra register needed to handle CFG"); 1356 } 1357 insertInstrBefore(I, AMDGPU::ELSE); 1358 1359 if (MigrateFalse) { 1360 migrateInstruction(FalseMBB, LandBlk, I); 1361 // need to uncondionally insert the assignment to ensure a path from its 1362 // predecessor rather than headBlk has valid value in initReg if 1363 // (initVal != 0) 1364 report_fatal_error("Extra register needed to handle CFG"); 1365 } 1366 1367 if (LandBlkHasOtherPred) { 1368 // add endif 1369 insertInstrBefore(I, AMDGPU::ENDIF); 1370 1371 // put initReg = 2 to other predecessors of landBlk 1372 for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), 1373 PE = LandBlk->pred_end(); PI != PE; ++PI) { 1374 MachineBasicBlock *MBB = *PI; 1375 if (MBB != TrueMBB && MBB != FalseMBB) 1376 report_fatal_error("Extra register needed to handle CFG"); 1377 } 1378 } 1379 DEBUG( 1380 dbgs() << "result from improveSimpleJumpintoIf: "; 1381 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); 1382 ); 1383 1384 // update landBlk 1385 *LandMBBPtr = LandBlk; 1386 1387 return NumNewBlk; 1388 } 1389 1390 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, 1391 MachineBasicBlock *SrcMBB) { 1392 DEBUG( 1393 dbgs() << "serialPattern BB" << DstMBB->getNumber() 1394 << " <= BB" << SrcMBB->getNumber() << "\n"; 1395 ); 1396 DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); 1397 1398 DstMBB->removeSuccessor(SrcMBB, true); 1399 cloneSuccessorList(DstMBB, SrcMBB); 1400 1401 removeSuccessor(SrcMBB); 1402 MLI->removeBlock(SrcMBB); 1403 retireBlock(SrcMBB); 1404 } 1405 1406 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, 1407 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 1408 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { 1409 assert (TrueMBB); 1410 DEBUG( 1411 dbgs() << "ifPattern BB" << MBB->getNumber(); 1412 dbgs() << "{ "; 1413 if (TrueMBB) { 1414 dbgs() << "BB" << TrueMBB->getNumber(); 1415 } 1416 dbgs() << " } else "; 1417 dbgs() << "{ "; 1418 if (FalseMBB) { 1419 dbgs() << "BB" << FalseMBB->getNumber(); 1420 } 1421 dbgs() << " }\n "; 1422 dbgs() << "landBlock: "; 1423 if (!LandMBB) { 1424 dbgs() << "NULL"; 1425 } else { 1426 dbgs() << "BB" << LandMBB->getNumber(); 1427 } 1428 dbgs() << "\n"; 1429 ); 1430 1431 int OldOpcode = BranchMI->getOpcode(); 1432 DebugLoc BranchDL = BranchMI->getDebugLoc(); 1433 1434 // transform to 1435 // if cond 1436 // trueBlk 1437 // else 1438 // falseBlk 1439 // endif 1440 // landBlk 1441 1442 MachineBasicBlock::iterator I = BranchMI; 1443 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), 1444 BranchDL); 1445 1446 if (TrueMBB) { 1447 MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); 1448 MBB->removeSuccessor(TrueMBB, true); 1449 if (LandMBB && TrueMBB->succ_size()!=0) 1450 TrueMBB->removeSuccessor(LandMBB, true); 1451 retireBlock(TrueMBB); 1452 MLI->removeBlock(TrueMBB); 1453 } 1454 1455 if (FalseMBB) { 1456 insertInstrBefore(I, AMDGPU::ELSE); 1457 MBB->splice(I, FalseMBB, FalseMBB->begin(), 1458 FalseMBB->end()); 1459 MBB->removeSuccessor(FalseMBB, true); 1460 if (LandMBB && FalseMBB->succ_size() != 0) 1461 FalseMBB->removeSuccessor(LandMBB, true); 1462 retireBlock(FalseMBB); 1463 MLI->removeBlock(FalseMBB); 1464 } 1465 insertInstrBefore(I, AMDGPU::ENDIF); 1466 1467 BranchMI->eraseFromParent(); 1468 1469 if (LandMBB && TrueMBB && FalseMBB) 1470 MBB->addSuccessor(LandMBB); 1471 } 1472 1473 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, 1474 MachineBasicBlock *LandMBB) { 1475 DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() 1476 << " land = BB" << LandMBB->getNumber() << "\n";); 1477 1478 insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); 1479 insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); 1480 DstBlk->replaceSuccessor(DstBlk, LandMBB); 1481 } 1482 1483 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 1484 MachineBasicBlock *LandMBB) { 1485 DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() 1486 << " land = BB" << LandMBB->getNumber() << "\n";); 1487 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); 1488 assert(BranchMI && isCondBranch(BranchMI)); 1489 DebugLoc DL = BranchMI->getDebugLoc(); 1490 MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); 1491 MachineBasicBlock::iterator I = BranchMI; 1492 if (TrueBranch != LandMBB) 1493 reversePredicateSetter(I, *I->getParent()); 1494 insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL); 1495 insertInstrBefore(I, AMDGPU::BREAK); 1496 insertInstrBefore(I, AMDGPU::ENDIF); 1497 //now branchInst can be erase safely 1498 BranchMI->eraseFromParent(); 1499 //now take care of successors, retire blocks 1500 ExitingMBB->removeSuccessor(LandMBB, true); 1501 } 1502 1503 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, 1504 MachineBasicBlock *ContMBB) { 1505 DEBUG(dbgs() << "settleLoopcontBlock conting = BB" 1506 << ContingMBB->getNumber() 1507 << ", cont = BB" << ContMBB->getNumber() << "\n";); 1508 1509 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); 1510 if (MI) { 1511 assert(isCondBranch(MI)); 1512 MachineBasicBlock::iterator I = MI; 1513 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 1514 int OldOpcode = MI->getOpcode(); 1515 DebugLoc DL = MI->getDebugLoc(); 1516 1517 bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); 1518 1519 if (!UseContinueLogical) { 1520 int BranchOpcode = 1521 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : 1522 getBranchZeroOpcode(OldOpcode); 1523 insertCondBranchBefore(I, BranchOpcode, DL); 1524 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1525 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL); 1526 insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL); 1527 } else { 1528 int BranchOpcode = 1529 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : 1530 getContinueZeroOpcode(OldOpcode); 1531 insertCondBranchBefore(I, BranchOpcode, DL); 1532 } 1533 1534 MI->eraseFromParent(); 1535 } else { 1536 // if we've arrived here then we've already erased the branch instruction 1537 // travel back up the basic block to see the last reference of our debug 1538 // location we've just inserted that reference here so it should be 1539 // representative insertEnd to ensure phi-moves, if exist, go before the 1540 // continue-instr. 1541 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, 1542 getLastDebugLocInBB(ContingMBB)); 1543 } 1544 } 1545 1546 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 1547 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { 1548 int Cloned = 0; 1549 assert(PreMBB->isSuccessor(SrcMBB)); 1550 while (SrcMBB && SrcMBB != DstMBB) { 1551 assert(SrcMBB->succ_size() == 1); 1552 if (SrcMBB->pred_size() > 1) { 1553 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); 1554 ++Cloned; 1555 } 1556 1557 PreMBB = SrcMBB; 1558 SrcMBB = *SrcMBB->succ_begin(); 1559 } 1560 1561 return Cloned; 1562 } 1563 1564 MachineBasicBlock * 1565 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, 1566 MachineBasicBlock *PredMBB) { 1567 assert(PredMBB->isSuccessor(MBB) && 1568 "succBlk is not a prececessor of curBlk"); 1569 1570 MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions 1571 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); 1572 //srcBlk, oldBlk, newBlk 1573 1574 PredMBB->replaceSuccessor(MBB, CloneMBB); 1575 1576 // add all successor to cloneBlk 1577 cloneSuccessorList(CloneMBB, MBB); 1578 1579 numClonedInstr += MBB->size(); 1580 1581 DEBUG( 1582 dbgs() << "Cloned block: " << "BB" 1583 << MBB->getNumber() << "size " << MBB->size() << "\n"; 1584 ); 1585 1586 SHOWNEWBLK(CloneMBB, "result of Cloned block: "); 1587 1588 return CloneMBB; 1589 } 1590 1591 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, 1592 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { 1593 MachineBasicBlock::iterator SpliceEnd; 1594 //look for the input branchinstr, not the AMDGPU branchinstr 1595 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); 1596 if (!BranchMI) { 1597 DEBUG( 1598 dbgs() << "migrateInstruction don't see branch instr\n" ; 1599 ); 1600 SpliceEnd = SrcMBB->end(); 1601 } else { 1602 DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); 1603 SpliceEnd = BranchMI; 1604 } 1605 DEBUG( 1606 dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size() 1607 << "srcSize = " << SrcMBB->size() << "\n"; 1608 ); 1609 1610 //splice insert before insertPos 1611 DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); 1612 1613 DEBUG( 1614 dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size() 1615 << "srcSize = " << SrcMBB->size() << '\n'; 1616 ); 1617 } 1618 1619 MachineBasicBlock * 1620 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { 1621 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1622 MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); 1623 1624 if (!LoopHeader || !LoopLatch) 1625 return nullptr; 1626 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); 1627 // Is LoopRep an infinite loop ? 1628 if (!BranchMI || !isUncondBranch(BranchMI)) 1629 return nullptr; 1630 1631 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1632 FuncRep->push_back(DummyExitBlk); //insert to function 1633 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 1634 DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); 1635 LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext(); 1636 Ctx.emitError("Extra register needed to handle CFG"); 1637 return nullptr; 1638 } 1639 1640 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { 1641 MachineInstr *BranchMI; 1642 1643 // I saw two unconditional branch in one basic block in example 1644 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 1645 while ((BranchMI = getLoopendBlockBranchInstr(MBB)) 1646 && isUncondBranch(BranchMI)) { 1647 DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); 1648 BranchMI->eraseFromParent(); 1649 } 1650 } 1651 1652 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( 1653 MachineBasicBlock *MBB) { 1654 if (MBB->succ_size() != 2) 1655 return; 1656 MachineBasicBlock *MBB1 = *MBB->succ_begin(); 1657 MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); 1658 if (MBB1 != MBB2) 1659 return; 1660 1661 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 1662 assert(BranchMI && isCondBranch(BranchMI)); 1663 DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); 1664 BranchMI->eraseFromParent(); 1665 SHOWNEWBLK(MBB1, "Removing redundant successor"); 1666 MBB->removeSuccessor(MBB1, true); 1667 } 1668 1669 void AMDGPUCFGStructurizer::addDummyExitBlock( 1670 SmallVectorImpl<MachineBasicBlock*> &RetMBB) { 1671 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1672 FuncRep->push_back(DummyExitBlk); //insert to function 1673 insertInstrEnd(DummyExitBlk, AMDGPU::RETURN); 1674 1675 for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(), 1676 E = RetMBB.end(); It != E; ++It) { 1677 MachineBasicBlock *MBB = *It; 1678 MachineInstr *MI = getReturnInstr(MBB); 1679 if (MI) 1680 MI->eraseFromParent(); 1681 MBB->addSuccessor(DummyExitBlk); 1682 DEBUG( 1683 dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() 1684 << " successors\n"; 1685 ); 1686 } 1687 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); 1688 } 1689 1690 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { 1691 while (MBB->succ_size()) 1692 MBB->removeSuccessor(*MBB->succ_begin()); 1693 } 1694 1695 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, 1696 int SccNum) { 1697 BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; 1698 if (!srcBlkInfo) 1699 srcBlkInfo = new BlockInformation(); 1700 srcBlkInfo->SccNum = SccNum; 1701 } 1702 1703 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { 1704 DEBUG( 1705 dbgs() << "Retiring BB" << MBB->getNumber() << "\n"; 1706 ); 1707 1708 BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; 1709 1710 if (!SrcBlkInfo) 1711 SrcBlkInfo = new BlockInformation(); 1712 1713 SrcBlkInfo->IsRetired = true; 1714 assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 1715 && "can't retire block yet"); 1716 } 1717 1718 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", 1719 "AMDGPU CFG Structurizer", false, false) 1720 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1721 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) 1722 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 1723 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", 1724 "AMDGPU CFG Structurizer", false, false) 1725 1726 FunctionPass *llvm::createAMDGPUCFGStructurizerPass() { 1727 return new AMDGPUCFGStructurizer(); 1728 } 1729