1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //==-----------------------------------------------------------------------===// 10 11 #include "AMDGPU.h" 12 #include "AMDGPUInstrInfo.h" 13 #include "AMDGPUSubtarget.h" 14 #include "R600InstrInfo.h" 15 #include "llvm/ADT/DepthFirstIterator.h" 16 #include "llvm/ADT/SCCIterator.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/CodeGen/MachineDominators.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineJumpTableInfo.h" 24 #include "llvm/CodeGen/MachineLoopInfo.h" 25 #include "llvm/CodeGen/MachinePostDominators.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 #include "llvm/Target/TargetMachine.h" 32 #include <deque> 33 34 using namespace llvm; 35 36 #define DEBUG_TYPE "structcfg" 37 38 #define DEFAULT_VEC_SLOTS 8 39 40 // TODO: move-begin. 41 42 //===----------------------------------------------------------------------===// 43 // 44 // Statistics for CFGStructurizer. 45 // 46 //===----------------------------------------------------------------------===// 47 48 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 49 "matched"); 50 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 51 "matched"); 52 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 53 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 54 55 namespace llvm { 56 void initializeAMDGPUCFGStructurizerPass(PassRegistry&); 57 } 58 59 //===----------------------------------------------------------------------===// 60 // 61 // Miscellaneous utility for CFGStructurizer. 62 // 63 //===----------------------------------------------------------------------===// 64 namespace { 65 #define SHOWNEWINSTR(i) \ 66 DEBUG(dbgs() << "New instr: " << *i << "\n"); 67 68 #define SHOWNEWBLK(b, msg) \ 69 DEBUG( \ 70 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 71 dbgs() << "\n"; \ 72 ); 73 74 #define SHOWBLK_DETAIL(b, msg) \ 75 DEBUG( \ 76 if (b) { \ 77 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 78 b->print(dbgs()); \ 79 dbgs() << "\n"; \ 80 } \ 81 ); 82 83 #define INVALIDSCCNUM -1 84 85 template<class NodeT> 86 void ReverseVector(SmallVectorImpl<NodeT *> &Src) { 87 size_t sz = Src.size(); 88 for (size_t i = 0; i < sz/2; ++i) { 89 NodeT *t = Src[i]; 90 Src[i] = Src[sz - i - 1]; 91 Src[sz - i - 1] = t; 92 } 93 } 94 95 } // end anonymous namespace 96 97 //===----------------------------------------------------------------------===// 98 // 99 // supporting data structure for CFGStructurizer 100 // 101 //===----------------------------------------------------------------------===// 102 103 104 namespace { 105 106 class BlockInformation { 107 public: 108 bool IsRetired; 109 int SccNum; 110 BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {} 111 }; 112 113 } // end anonymous namespace 114 115 //===----------------------------------------------------------------------===// 116 // 117 // CFGStructurizer 118 // 119 //===----------------------------------------------------------------------===// 120 121 namespace { 122 class AMDGPUCFGStructurizer : public MachineFunctionPass { 123 public: 124 typedef SmallVector<MachineBasicBlock *, 32> MBBVector; 125 typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap; 126 typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap; 127 128 enum PathToKind { 129 Not_SinglePath = 0, 130 SinglePath_InPath = 1, 131 SinglePath_NotInPath = 2 132 }; 133 134 static char ID; 135 136 AMDGPUCFGStructurizer() : 137 MachineFunctionPass(ID), TII(nullptr), TRI(nullptr) { 138 initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); 139 } 140 141 StringRef getPassName() const override { 142 return "AMDGPU Control Flow Graph structurizer Pass"; 143 } 144 145 void getAnalysisUsage(AnalysisUsage &AU) const override { 146 AU.addRequired<MachineDominatorTree>(); 147 AU.addRequired<MachinePostDominatorTree>(); 148 AU.addRequired<MachineLoopInfo>(); 149 MachineFunctionPass::getAnalysisUsage(AU); 150 } 151 152 /// Perform the CFG structurization 153 bool run(); 154 155 /// Perform the CFG preparation 156 /// This step will remove every unconditionnal/dead jump instructions and make 157 /// sure all loops have an exit block 158 bool prepare(); 159 160 bool runOnMachineFunction(MachineFunction &MF) override { 161 TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); 162 TRI = &TII->getRegisterInfo(); 163 DEBUG(MF.dump();); 164 OrderedBlks.clear(); 165 Visited.clear(); 166 FuncRep = &MF; 167 MLI = &getAnalysis<MachineLoopInfo>(); 168 DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); 169 MDT = &getAnalysis<MachineDominatorTree>(); 170 DEBUG(MDT->print(dbgs(), (const llvm::Module*)nullptr);); 171 PDT = &getAnalysis<MachinePostDominatorTree>(); 172 DEBUG(PDT->print(dbgs());); 173 prepare(); 174 run(); 175 DEBUG(MF.dump();); 176 return true; 177 } 178 179 protected: 180 MachineDominatorTree *MDT; 181 MachinePostDominatorTree *PDT; 182 MachineLoopInfo *MLI; 183 const R600InstrInfo *TII; 184 const R600RegisterInfo *TRI; 185 186 // PRINT FUNCTIONS 187 /// Print the ordered Blocks. 188 void printOrderedBlocks() const { 189 size_t i = 0; 190 for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), 191 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { 192 dbgs() << "BB" << (*iterBlk)->getNumber(); 193 dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 194 if (i != 0 && i % 10 == 0) { 195 dbgs() << "\n"; 196 } else { 197 dbgs() << " "; 198 } 199 } 200 } 201 static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { 202 for (MachineLoop::iterator iter = LoopInfo.begin(), 203 iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { 204 (*iter)->print(dbgs(), 0); 205 } 206 } 207 208 // UTILITY FUNCTIONS 209 int getSCCNum(MachineBasicBlock *MBB) const; 210 MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; 211 bool hasBackEdge(MachineBasicBlock *MBB) const; 212 bool isRetiredBlock(MachineBasicBlock *MBB) const; 213 bool isActiveLoophead(MachineBasicBlock *MBB) const; 214 PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 215 bool AllowSideEntry = true) const; 216 int countActiveBlock(MBBVector::const_iterator It, 217 MBBVector::const_iterator E) const; 218 bool needMigrateBlock(MachineBasicBlock *MBB) const; 219 220 // Utility Functions 221 void reversePredicateSetter(MachineBasicBlock::iterator I, 222 MachineBasicBlock &MBB); 223 /// Compute the reversed DFS post order of Blocks 224 void orderBlocks(MachineFunction *MF); 225 226 // Function originally from CFGStructTraits 227 void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, 228 const DebugLoc &DL = DebugLoc()); 229 MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, 230 const DebugLoc &DL = DebugLoc()); 231 MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); 232 void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, 233 const DebugLoc &DL); 234 void insertCondBranchBefore(MachineBasicBlock *MBB, 235 MachineBasicBlock::iterator I, int NewOpcode, 236 int RegNum, const DebugLoc &DL); 237 static int getBranchNzeroOpcode(int OldOpcode); 238 static int getBranchZeroOpcode(int OldOpcode); 239 static int getContinueNzeroOpcode(int OldOpcode); 240 static int getContinueZeroOpcode(int OldOpcode); 241 static MachineBasicBlock *getTrueBranch(MachineInstr *MI); 242 static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); 243 static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, 244 MachineInstr *MI); 245 static bool isCondBranch(MachineInstr *MI); 246 static bool isUncondBranch(MachineInstr *MI); 247 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); 248 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); 249 /// The correct naming for this is getPossibleLoopendBlockBranchInstr. 250 /// 251 /// BB with backward-edge could have move instructions after the branch 252 /// instruction. Such move instruction "belong to" the loop backward-edge. 253 MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); 254 static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); 255 static bool isReturnBlock(MachineBasicBlock *MBB); 256 static void cloneSuccessorList(MachineBasicBlock *DstMBB, 257 MachineBasicBlock *SrcMBB) ; 258 static MachineBasicBlock *clone(MachineBasicBlock *MBB); 259 /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose 260 /// because the AMDGPU instruction is not recognized as terminator fix this 261 /// and retire this routine 262 void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, 263 MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); 264 static void wrapup(MachineBasicBlock *MBB); 265 266 267 int patternMatch(MachineBasicBlock *MBB); 268 int patternMatchGroup(MachineBasicBlock *MBB); 269 int serialPatternMatch(MachineBasicBlock *MBB); 270 int ifPatternMatch(MachineBasicBlock *MBB); 271 int loopendPatternMatch(); 272 int mergeLoop(MachineLoop *LoopRep); 273 274 /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in 275 /// the same loop with LoopLandInfo without explicitly keeping track of 276 /// loopContBlks and loopBreakBlks, this is a method to get the information. 277 bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, 278 MachineBasicBlock *Src2MBB); 279 int handleJumpintoIf(MachineBasicBlock *HeadMBB, 280 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 281 int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 282 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 283 int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 284 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 285 MachineBasicBlock **LandMBBPtr); 286 void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 287 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 288 MachineBasicBlock *LandMBB, bool Detail = false); 289 int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 290 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); 291 void mergeSerialBlock(MachineBasicBlock *DstMBB, 292 MachineBasicBlock *SrcMBB); 293 294 void mergeIfthenelseBlock(MachineInstr *BranchMI, 295 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 296 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); 297 void mergeLooplandBlock(MachineBasicBlock *DstMBB, 298 MachineBasicBlock *LandMBB); 299 void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 300 MachineBasicBlock *LandMBB); 301 void settleLoopcontBlock(MachineBasicBlock *ContingMBB, 302 MachineBasicBlock *ContMBB); 303 /// normalizeInfiniteLoopExit change 304 /// B1: 305 /// uncond_br LoopHeader 306 /// 307 /// to 308 /// B1: 309 /// cond_br 1 LoopHeader dummyExit 310 /// and return the newly added dummy exit block 311 MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); 312 void removeUnconditionalBranch(MachineBasicBlock *MBB); 313 /// Remove duplicate branches instructions in a block. 314 /// For instance 315 /// B0: 316 /// cond_br X B1 B2 317 /// cond_br X B1 B2 318 /// is transformed to 319 /// B0: 320 /// cond_br X B1 B2 321 void removeRedundantConditionalBranch(MachineBasicBlock *MBB); 322 void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); 323 void removeSuccessor(MachineBasicBlock *MBB); 324 MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, 325 MachineBasicBlock *PredMBB); 326 void migrateInstruction(MachineBasicBlock *SrcMBB, 327 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); 328 void recordSccnum(MachineBasicBlock *MBB, int SCCNum); 329 void retireBlock(MachineBasicBlock *MBB); 330 331 332 private: 333 MBBInfoMap BlockInfoMap; 334 LoopLandInfoMap LLInfoMap; 335 std::map<MachineLoop *, bool> Visited; 336 MachineFunction *FuncRep; 337 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; 338 }; 339 340 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { 341 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 342 if (It == BlockInfoMap.end()) 343 return INVALIDSCCNUM; 344 return (*It).second->SccNum; 345 } 346 347 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) 348 const { 349 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); 350 if (It == LLInfoMap.end()) 351 return nullptr; 352 return (*It).second; 353 } 354 355 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { 356 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 357 if (!LoopRep) 358 return false; 359 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 360 return MBB->isSuccessor(LoopHeader); 361 } 362 363 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { 364 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 365 if (It == BlockInfoMap.end()) 366 return false; 367 return (*It).second->IsRetired; 368 } 369 370 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { 371 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 372 while (LoopRep && LoopRep->getHeader() == MBB) { 373 MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); 374 if(!LoopLand) 375 return true; 376 if (!isRetiredBlock(LoopLand)) 377 return true; 378 LoopRep = LoopRep->getParentLoop(); 379 } 380 return false; 381 } 382 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( 383 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 384 bool AllowSideEntry) const { 385 assert(DstMBB); 386 if (SrcMBB == DstMBB) 387 return SinglePath_InPath; 388 while (SrcMBB && SrcMBB->succ_size() == 1) { 389 SrcMBB = *SrcMBB->succ_begin(); 390 if (SrcMBB == DstMBB) 391 return SinglePath_InPath; 392 if (!AllowSideEntry && SrcMBB->pred_size() > 1) 393 return Not_SinglePath; 394 } 395 if (SrcMBB && SrcMBB->succ_size()==0) 396 return SinglePath_NotInPath; 397 return Not_SinglePath; 398 } 399 400 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, 401 MBBVector::const_iterator E) const { 402 int Count = 0; 403 while (It != E) { 404 if (!isRetiredBlock(*It)) 405 ++Count; 406 ++It; 407 } 408 return Count; 409 } 410 411 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { 412 unsigned BlockSizeThreshold = 30; 413 unsigned CloneInstrThreshold = 100; 414 bool MultiplePreds = MBB && (MBB->pred_size() > 1); 415 416 if(!MultiplePreds) 417 return false; 418 unsigned BlkSize = MBB->size(); 419 return ((BlkSize > BlockSizeThreshold) && 420 (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); 421 } 422 423 void AMDGPUCFGStructurizer::reversePredicateSetter( 424 MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { 425 assert(I.isValid() && "Expected valid iterator"); 426 for (;; --I) { 427 if (I == MBB.end()) 428 continue; 429 if (I->getOpcode() == AMDGPU::PRED_X) { 430 switch (I->getOperand(2).getImm()) { 431 case AMDGPU::PRED_SETE_INT: 432 I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT); 433 return; 434 case AMDGPU::PRED_SETNE_INT: 435 I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT); 436 return; 437 case AMDGPU::PRED_SETE: 438 I->getOperand(2).setImm(AMDGPU::PRED_SETNE); 439 return; 440 case AMDGPU::PRED_SETNE: 441 I->getOperand(2).setImm(AMDGPU::PRED_SETE); 442 return; 443 default: 444 llvm_unreachable("PRED_X Opcode invalid!"); 445 } 446 } 447 } 448 } 449 450 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, 451 int NewOpcode, const DebugLoc &DL) { 452 MachineInstr *MI = 453 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 454 MBB->push_back(MI); 455 //assume the instruction doesn't take any reg operand ... 456 SHOWNEWINSTR(MI); 457 } 458 459 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, 460 int NewOpcode, 461 const DebugLoc &DL) { 462 MachineInstr *MI = 463 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 464 if (MBB->begin() != MBB->end()) 465 MBB->insert(MBB->begin(), MI); 466 else 467 MBB->push_back(MI); 468 SHOWNEWINSTR(MI); 469 return MI; 470 } 471 472 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( 473 MachineBasicBlock::iterator I, int NewOpcode) { 474 MachineInstr *OldMI = &(*I); 475 MachineBasicBlock *MBB = OldMI->getParent(); 476 MachineInstr *NewMBB = 477 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 478 MBB->insert(I, NewMBB); 479 //assume the instruction doesn't take any reg operand ... 480 SHOWNEWINSTR(NewMBB); 481 return NewMBB; 482 } 483 484 void AMDGPUCFGStructurizer::insertCondBranchBefore( 485 MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { 486 MachineInstr *OldMI = &(*I); 487 MachineBasicBlock *MBB = OldMI->getParent(); 488 MachineFunction *MF = MBB->getParent(); 489 MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 490 MBB->insert(I, NewMI); 491 MachineInstrBuilder MIB(*MF, NewMI); 492 MIB.addReg(OldMI->getOperand(1).getReg(), false); 493 SHOWNEWINSTR(NewMI); 494 //erase later oldInstr->eraseFromParent(); 495 } 496 497 void AMDGPUCFGStructurizer::insertCondBranchBefore( 498 MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, 499 int RegNum, const DebugLoc &DL) { 500 MachineFunction *MF = blk->getParent(); 501 MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 502 //insert before 503 blk->insert(I, NewInstr); 504 MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 505 SHOWNEWINSTR(NewInstr); 506 } 507 508 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { 509 switch(OldOpcode) { 510 case AMDGPU::JUMP_COND: 511 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 512 case AMDGPU::BRANCH_COND_i32: 513 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; 514 default: llvm_unreachable("internal error"); 515 } 516 return -1; 517 } 518 519 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { 520 switch(OldOpcode) { 521 case AMDGPU::JUMP_COND: 522 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 523 case AMDGPU::BRANCH_COND_i32: 524 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; 525 default: llvm_unreachable("internal error"); 526 } 527 return -1; 528 } 529 530 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { 531 switch(OldOpcode) { 532 case AMDGPU::JUMP_COND: 533 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; 534 default: llvm_unreachable("internal error"); 535 }; 536 return -1; 537 } 538 539 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { 540 switch(OldOpcode) { 541 case AMDGPU::JUMP_COND: 542 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; 543 default: llvm_unreachable("internal error"); 544 } 545 return -1; 546 } 547 548 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { 549 return MI->getOperand(0).getMBB(); 550 } 551 552 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, 553 MachineBasicBlock *MBB) { 554 MI->getOperand(0).setMBB(MBB); 555 } 556 557 MachineBasicBlock * 558 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, 559 MachineInstr *MI) { 560 assert(MBB->succ_size() == 2); 561 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 562 MachineBasicBlock::succ_iterator It = MBB->succ_begin(); 563 MachineBasicBlock::succ_iterator Next = It; 564 ++Next; 565 return (*It == TrueBranch) ? *Next : *It; 566 } 567 568 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { 569 switch (MI->getOpcode()) { 570 case AMDGPU::JUMP_COND: 571 case AMDGPU::BRANCH_COND_i32: 572 case AMDGPU::BRANCH_COND_f32: return true; 573 default: 574 return false; 575 } 576 return false; 577 } 578 579 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { 580 switch (MI->getOpcode()) { 581 case AMDGPU::JUMP: 582 case AMDGPU::BRANCH: 583 return true; 584 default: 585 return false; 586 } 587 return false; 588 } 589 590 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { 591 //get DebugLoc from the first MachineBasicBlock instruction with debug info 592 DebugLoc DL; 593 for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); 594 ++It) { 595 MachineInstr *instr = &(*It); 596 if (instr->getDebugLoc()) 597 DL = instr->getDebugLoc(); 598 } 599 return DL; 600 } 601 602 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( 603 MachineBasicBlock *MBB) { 604 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 605 MachineInstr *MI = &*It; 606 if (MI && (isCondBranch(MI) || isUncondBranch(MI))) 607 return MI; 608 return nullptr; 609 } 610 611 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( 612 MachineBasicBlock *MBB) { 613 for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); 614 It != E; ++It) { 615 // FIXME: Simplify 616 MachineInstr *MI = &*It; 617 if (MI) { 618 if (isCondBranch(MI) || isUncondBranch(MI)) 619 return MI; 620 else if (!TII->isMov(MI->getOpcode())) 621 break; 622 } 623 } 624 return nullptr; 625 } 626 627 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { 628 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 629 if (It != MBB->rend()) { 630 MachineInstr *instr = &(*It); 631 if (instr->getOpcode() == AMDGPU::RETURN) 632 return instr; 633 } 634 return nullptr; 635 } 636 637 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { 638 MachineInstr *MI = getReturnInstr(MBB); 639 bool IsReturn = (MBB->succ_size() == 0); 640 if (MI) 641 assert(IsReturn); 642 else if (IsReturn) 643 DEBUG( 644 dbgs() << "BB" << MBB->getNumber() 645 <<" is return block without RETURN instr\n";); 646 return IsReturn; 647 } 648 649 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, 650 MachineBasicBlock *SrcMBB) { 651 for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), 652 iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) 653 DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of 654 } 655 656 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { 657 MachineFunction *Func = MBB->getParent(); 658 MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); 659 Func->push_back(NewMBB); //insert to function 660 for (const MachineInstr &It : *MBB) 661 NewMBB->push_back(Func->CloneMachineInstr(&It)); 662 return NewMBB; 663 } 664 665 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( 666 MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, 667 MachineBasicBlock *NewBlk) { 668 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); 669 if (BranchMI && isCondBranch(BranchMI) && 670 getTrueBranch(BranchMI) == OldMBB) 671 setTrueBranch(BranchMI, NewBlk); 672 } 673 674 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { 675 assert((!MBB->getParent()->getJumpTableInfo() 676 || MBB->getParent()->getJumpTableInfo()->isEmpty()) 677 && "found a jump table"); 678 679 //collect continue right before endloop 680 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; 681 MachineBasicBlock::iterator Pre = MBB->begin(); 682 MachineBasicBlock::iterator E = MBB->end(); 683 MachineBasicBlock::iterator It = Pre; 684 while (It != E) { 685 if (Pre->getOpcode() == AMDGPU::CONTINUE 686 && It->getOpcode() == AMDGPU::ENDLOOP) 687 ContInstr.push_back(&*Pre); 688 Pre = It; 689 ++It; 690 } 691 692 //delete continue right before endloop 693 for (unsigned i = 0; i < ContInstr.size(); ++i) 694 ContInstr[i]->eraseFromParent(); 695 696 // TODO to fix up jump table so later phase won't be confused. if 697 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 698 // there isn't such an interface yet. alternatively, replace all the other 699 // blocks in the jump table with the entryBlk //} 700 701 } 702 703 704 bool AMDGPUCFGStructurizer::prepare() { 705 bool Changed = false; 706 707 //FIXME: if not reducible flow graph, make it so ??? 708 709 DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); 710 711 orderBlocks(FuncRep); 712 713 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; 714 715 // Add an ExitBlk to loop that don't have one 716 for (MachineLoopInfo::iterator It = MLI->begin(), 717 E = MLI->end(); It != E; ++It) { 718 MachineLoop *LoopRep = (*It); 719 MBBVector ExitingMBBs; 720 LoopRep->getExitingBlocks(ExitingMBBs); 721 722 if (ExitingMBBs.size() == 0) { 723 MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); 724 if (DummyExitBlk) 725 RetBlks.push_back(DummyExitBlk); 726 } 727 } 728 729 // Remove unconditional branch instr. 730 // Add dummy exit block iff there are multiple returns. 731 for (SmallVectorImpl<MachineBasicBlock *>::const_iterator 732 It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { 733 MachineBasicBlock *MBB = *It; 734 removeUnconditionalBranch(MBB); 735 removeRedundantConditionalBranch(MBB); 736 if (isReturnBlock(MBB)) { 737 RetBlks.push_back(MBB); 738 } 739 assert(MBB->succ_size() <= 2); 740 } 741 742 if (RetBlks.size() >= 2) { 743 addDummyExitBlock(RetBlks); 744 Changed = true; 745 } 746 747 return Changed; 748 } 749 750 bool AMDGPUCFGStructurizer::run() { 751 752 //Assume reducible CFG... 753 DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); 754 755 #ifdef STRESSTEST 756 //Use the worse block ordering to test the algorithm. 757 ReverseVector(orderedBlks); 758 #endif 759 760 DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); 761 int NumIter = 0; 762 bool Finish = false; 763 MachineBasicBlock *MBB; 764 bool MakeProgress = false; 765 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), 766 OrderedBlks.end()); 767 768 do { 769 ++NumIter; 770 DEBUG( 771 dbgs() << "numIter = " << NumIter 772 << ", numRemaintedBlk = " << NumRemainedBlk << "\n"; 773 ); 774 775 SmallVectorImpl<MachineBasicBlock *>::const_iterator It = 776 OrderedBlks.begin(); 777 SmallVectorImpl<MachineBasicBlock *>::const_iterator E = 778 OrderedBlks.end(); 779 780 SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = 781 It; 782 MachineBasicBlock *SccBeginMBB = nullptr; 783 int SccNumBlk = 0; // The number of active blocks, init to a 784 // maximum possible number. 785 int SccNumIter; // Number of iteration in this SCC. 786 787 while (It != E) { 788 MBB = *It; 789 790 if (!SccBeginMBB) { 791 SccBeginIter = It; 792 SccBeginMBB = MBB; 793 SccNumIter = 0; 794 SccNumBlk = NumRemainedBlk; // Init to maximum possible number. 795 DEBUG( 796 dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); 797 dbgs() << "\n"; 798 ); 799 } 800 801 if (!isRetiredBlock(MBB)) 802 patternMatch(MBB); 803 804 ++It; 805 806 bool ContNextScc = true; 807 if (It == E 808 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { 809 // Just finish one scc. 810 ++SccNumIter; 811 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); 812 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { 813 DEBUG( 814 dbgs() << "Can't reduce SCC " << getSCCNum(MBB) 815 << ", sccNumIter = " << SccNumIter; 816 dbgs() << "doesn't make any progress\n"; 817 ); 818 ContNextScc = true; 819 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { 820 SccNumBlk = sccRemainedNumBlk; 821 It = SccBeginIter; 822 ContNextScc = false; 823 DEBUG( 824 dbgs() << "repeat processing SCC" << getSCCNum(MBB) 825 << "sccNumIter = " << SccNumIter << '\n'; 826 ); 827 } else { 828 // Finish the current scc. 829 ContNextScc = true; 830 } 831 } else { 832 // Continue on next component in the current scc. 833 ContNextScc = false; 834 } 835 836 if (ContNextScc) 837 SccBeginMBB = nullptr; 838 } //while, "one iteration" over the function. 839 840 MachineBasicBlock *EntryMBB = 841 *GraphTraits<MachineFunction *>::nodes_begin(FuncRep); 842 if (EntryMBB->succ_size() == 0) { 843 Finish = true; 844 DEBUG( 845 dbgs() << "Reduce to one block\n"; 846 ); 847 } else { 848 int NewnumRemainedBlk 849 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); 850 // consider cloned blocks ?? 851 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { 852 MakeProgress = true; 853 NumRemainedBlk = NewnumRemainedBlk; 854 } else { 855 MakeProgress = false; 856 DEBUG( 857 dbgs() << "No progress\n"; 858 ); 859 } 860 } 861 } while (!Finish && MakeProgress); 862 863 // Misc wrap up to maintain the consistency of the Function representation. 864 wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep)); 865 866 // Detach retired Block, release memory. 867 for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); 868 It != E; ++It) { 869 if ((*It).second && (*It).second->IsRetired) { 870 assert(((*It).first)->getNumber() != -1); 871 DEBUG( 872 dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n"; 873 ); 874 (*It).first->eraseFromParent(); //Remove from the parent Function. 875 } 876 delete (*It).second; 877 } 878 BlockInfoMap.clear(); 879 LLInfoMap.clear(); 880 881 if (!Finish) { 882 DEBUG(FuncRep->viewCFG()); 883 report_fatal_error("IRREDUCIBLE_CFG"); 884 } 885 886 return true; 887 } 888 889 890 891 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { 892 int SccNum = 0; 893 MachineBasicBlock *MBB; 894 for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd(); 895 ++It, ++SccNum) { 896 const std::vector<MachineBasicBlock *> &SccNext = *It; 897 for (std::vector<MachineBasicBlock *>::const_iterator 898 blockIter = SccNext.begin(), blockEnd = SccNext.end(); 899 blockIter != blockEnd; ++blockIter) { 900 MBB = *blockIter; 901 OrderedBlks.push_back(MBB); 902 recordSccnum(MBB, SccNum); 903 } 904 } 905 906 //walk through all the block in func to check for unreachable 907 typedef GraphTraits<MachineFunction *> GTM; 908 auto It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF); 909 for (; It != E; ++It) { 910 MachineBasicBlock *MBB = *It; 911 SccNum = getSCCNum(MBB); 912 if (SccNum == INVALIDSCCNUM) 913 dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; 914 } 915 } 916 917 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { 918 int NumMatch = 0; 919 int CurMatch; 920 921 DEBUG( 922 dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n"; 923 ); 924 925 while ((CurMatch = patternMatchGroup(MBB)) > 0) 926 NumMatch += CurMatch; 927 928 DEBUG( 929 dbgs() << "End patternMatch BB" << MBB->getNumber() 930 << ", numMatch = " << NumMatch << "\n"; 931 ); 932 933 return NumMatch; 934 } 935 936 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { 937 int NumMatch = 0; 938 NumMatch += loopendPatternMatch(); 939 NumMatch += serialPatternMatch(MBB); 940 NumMatch += ifPatternMatch(MBB); 941 return NumMatch; 942 } 943 944 945 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { 946 if (MBB->succ_size() != 1) 947 return 0; 948 949 MachineBasicBlock *childBlk = *MBB->succ_begin(); 950 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) 951 return 0; 952 953 mergeSerialBlock(MBB, childBlk); 954 ++numSerialPatternMatch; 955 return 1; 956 } 957 958 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { 959 //two edges 960 if (MBB->succ_size() != 2) 961 return 0; 962 if (hasBackEdge(MBB)) 963 return 0; 964 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 965 if (!BranchMI) 966 return 0; 967 968 assert(isCondBranch(BranchMI)); 969 int NumMatch = 0; 970 971 MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); 972 NumMatch += serialPatternMatch(TrueMBB); 973 NumMatch += ifPatternMatch(TrueMBB); 974 MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); 975 NumMatch += serialPatternMatch(FalseMBB); 976 NumMatch += ifPatternMatch(FalseMBB); 977 MachineBasicBlock *LandBlk; 978 int Cloned = 0; 979 980 assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); 981 // TODO: Simplify 982 if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 983 && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { 984 // Diamond pattern 985 LandBlk = *TrueMBB->succ_begin(); 986 } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { 987 // Triangle pattern, false is empty 988 LandBlk = FalseMBB; 989 FalseMBB = nullptr; 990 } else if (FalseMBB->succ_size() == 1 991 && *FalseMBB->succ_begin() == TrueMBB) { 992 // Triangle pattern, true is empty 993 // We reverse the predicate to make a triangle, empty false pattern; 994 std::swap(TrueMBB, FalseMBB); 995 reversePredicateSetter(MBB->end(), *MBB); 996 LandBlk = FalseMBB; 997 FalseMBB = nullptr; 998 } else if (FalseMBB->succ_size() == 1 999 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { 1000 LandBlk = *FalseMBB->succ_begin(); 1001 } else if (TrueMBB->succ_size() == 1 1002 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { 1003 LandBlk = *TrueMBB->succ_begin(); 1004 } else { 1005 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); 1006 } 1007 1008 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 1009 // new BB created for landBlk==NULL may introduce new challenge to the 1010 // reduction process. 1011 if (LandBlk && 1012 ((TrueMBB && TrueMBB->pred_size() > 1) 1013 || (FalseMBB && FalseMBB->pred_size() > 1))) { 1014 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); 1015 } 1016 1017 if (TrueMBB && TrueMBB->pred_size() > 1) { 1018 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); 1019 ++Cloned; 1020 } 1021 1022 if (FalseMBB && FalseMBB->pred_size() > 1) { 1023 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); 1024 ++Cloned; 1025 } 1026 1027 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); 1028 1029 ++numIfPatternMatch; 1030 1031 numClonedBlock += Cloned; 1032 1033 return 1 + Cloned + NumMatch; 1034 } 1035 1036 int AMDGPUCFGStructurizer::loopendPatternMatch() { 1037 std::deque<MachineLoop *> NestedLoops; 1038 for (auto &It: *MLI) 1039 for (MachineLoop *ML : depth_first(It)) 1040 NestedLoops.push_front(ML); 1041 1042 if (NestedLoops.size() == 0) 1043 return 0; 1044 1045 // Process nested loop outside->inside (we did push_front), 1046 // so "continue" to a outside loop won't be mistaken as "break" 1047 // of the current loop. 1048 int Num = 0; 1049 for (MachineLoop *ExaminedLoop : NestedLoops) { 1050 if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) 1051 continue; 1052 DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); 1053 int NumBreak = mergeLoop(ExaminedLoop); 1054 if (NumBreak == -1) 1055 break; 1056 Num += NumBreak; 1057 } 1058 return Num; 1059 } 1060 1061 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { 1062 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1063 MBBVector ExitingMBBs; 1064 LoopRep->getExitingBlocks(ExitingMBBs); 1065 assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); 1066 DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";); 1067 // We assume a single ExitBlk 1068 MBBVector ExitBlks; 1069 LoopRep->getExitBlocks(ExitBlks); 1070 SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; 1071 for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) 1072 ExitBlkSet.insert(ExitBlks[i]); 1073 assert(ExitBlkSet.size() == 1); 1074 MachineBasicBlock *ExitBlk = *ExitBlks.begin(); 1075 assert(ExitBlk && "Loop has several exit block"); 1076 MBBVector LatchBlks; 1077 typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits; 1078 InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader), 1079 PE = InvMBBTraits::child_end(LoopHeader); 1080 for (; PI != PE; PI++) { 1081 if (LoopRep->contains(*PI)) 1082 LatchBlks.push_back(*PI); 1083 } 1084 1085 for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) 1086 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); 1087 for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) 1088 settleLoopcontBlock(LatchBlks[i], LoopHeader); 1089 int Match = 0; 1090 do { 1091 Match = 0; 1092 Match += serialPatternMatch(LoopHeader); 1093 Match += ifPatternMatch(LoopHeader); 1094 } while (Match > 0); 1095 mergeLooplandBlock(LoopHeader, ExitBlk); 1096 MachineLoop *ParentLoop = LoopRep->getParentLoop(); 1097 if (ParentLoop) 1098 MLI->changeLoopFor(LoopHeader, ParentLoop); 1099 else 1100 MLI->removeBlock(LoopHeader); 1101 Visited[LoopRep] = true; 1102 return 1; 1103 } 1104 1105 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( 1106 MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { 1107 if (Src1MBB->succ_size() == 0) { 1108 MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); 1109 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { 1110 MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; 1111 if (TheEntry) { 1112 DEBUG( 1113 dbgs() << "isLoopContBreakBlock yes src1 = BB" 1114 << Src1MBB->getNumber() 1115 << " src2 = BB" << Src2MBB->getNumber() << "\n"; 1116 ); 1117 return true; 1118 } 1119 } 1120 } 1121 return false; 1122 } 1123 1124 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, 1125 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1126 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); 1127 if (Num == 0) { 1128 DEBUG( 1129 dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; 1130 ); 1131 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); 1132 } 1133 return Num; 1134 } 1135 1136 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 1137 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1138 int Num = 0; 1139 MachineBasicBlock *DownBlk; 1140 1141 //trueBlk could be the common post dominator 1142 DownBlk = TrueMBB; 1143 1144 DEBUG( 1145 dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() 1146 << " true = BB" << TrueMBB->getNumber() 1147 << ", numSucc=" << TrueMBB->succ_size() 1148 << " false = BB" << FalseMBB->getNumber() << "\n"; 1149 ); 1150 1151 while (DownBlk) { 1152 DEBUG( 1153 dbgs() << "check down = BB" << DownBlk->getNumber(); 1154 ); 1155 1156 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { 1157 DEBUG( 1158 dbgs() << " working\n"; 1159 ); 1160 1161 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); 1162 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); 1163 1164 numClonedBlock += Num; 1165 Num += serialPatternMatch(*HeadMBB->succ_begin()); 1166 Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); 1167 Num += ifPatternMatch(HeadMBB); 1168 assert(Num > 0); 1169 1170 break; 1171 } 1172 DEBUG( 1173 dbgs() << " not working\n"; 1174 ); 1175 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; 1176 } // walk down the postDomTree 1177 1178 return Num; 1179 } 1180 1181 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( 1182 MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, 1183 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { 1184 dbgs() << "head = BB" << HeadMBB->getNumber() 1185 << " size = " << HeadMBB->size(); 1186 if (Detail) { 1187 dbgs() << "\n"; 1188 HeadMBB->print(dbgs()); 1189 dbgs() << "\n"; 1190 } 1191 1192 if (TrueMBB) { 1193 dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " 1194 << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); 1195 if (Detail) { 1196 dbgs() << "\n"; 1197 TrueMBB->print(dbgs()); 1198 dbgs() << "\n"; 1199 } 1200 } 1201 if (FalseMBB) { 1202 dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " 1203 << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); 1204 if (Detail) { 1205 dbgs() << "\n"; 1206 FalseMBB->print(dbgs()); 1207 dbgs() << "\n"; 1208 } 1209 } 1210 if (LandMBB) { 1211 dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " 1212 << LandMBB->size() << " numPred = " << LandMBB->pred_size(); 1213 if (Detail) { 1214 dbgs() << "\n"; 1215 LandMBB->print(dbgs()); 1216 dbgs() << "\n"; 1217 } 1218 } 1219 1220 dbgs() << "\n"; 1221 } 1222 1223 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 1224 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 1225 MachineBasicBlock **LandMBBPtr) { 1226 bool MigrateTrue = false; 1227 bool MigrateFalse = false; 1228 1229 MachineBasicBlock *LandBlk = *LandMBBPtr; 1230 1231 assert((!TrueMBB || TrueMBB->succ_size() <= 1) 1232 && (!FalseMBB || FalseMBB->succ_size() <= 1)); 1233 1234 if (TrueMBB == FalseMBB) 1235 return 0; 1236 1237 MigrateTrue = needMigrateBlock(TrueMBB); 1238 MigrateFalse = needMigrateBlock(FalseMBB); 1239 1240 if (!MigrateTrue && !MigrateFalse) 1241 return 0; 1242 1243 // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1244 // have more than one predecessors. without doing this, its predecessor 1245 // rather than headBlk will have undefined value in initReg. 1246 if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) 1247 MigrateTrue = true; 1248 if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) 1249 MigrateFalse = true; 1250 1251 DEBUG( 1252 dbgs() << "before improveSimpleJumpintoIf: "; 1253 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); 1254 ); 1255 1256 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1257 // 1258 // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1259 // {initReg = 0; org falseBlk branch } 1260 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1261 // => org landBlk 1262 // if landBlk->pred_size() > 2, put the about if-else inside 1263 // if (initReg !=2) {...} 1264 // 1265 // add initReg = initVal to headBlk 1266 1267 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1268 if (!MigrateTrue || !MigrateFalse) { 1269 // XXX: We have an opportunity here to optimize the "branch into if" case 1270 // here. Branch into if looks like this: 1271 // entry 1272 // / | 1273 // diamond_head branch_from 1274 // / \ | 1275 // diamond_false diamond_true 1276 // \ / 1277 // done 1278 // 1279 // The diamond_head block begins the "if" and the diamond_true block 1280 // is the block being "branched into". 1281 // 1282 // If MigrateTrue is true, then TrueBB is the block being "branched into" 1283 // and if MigrateFalse is true, then FalseBB is the block being 1284 // "branched into" 1285 // 1286 // Here is the pseudo code for how I think the optimization should work: 1287 // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. 1288 // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. 1289 // 3. Move the branch instruction from diamond_head into its own basic 1290 // block (new_block). 1291 // 4. Add an unconditional branch from diamond_head to new_block 1292 // 5. Replace the branch instruction in branch_from with an unconditional 1293 // branch to new_block. If branch_from has multiple predecessors, then 1294 // we need to replace the True/False block in the branch 1295 // instruction instead of replacing it. 1296 // 6. Change the condition of the branch instruction in new_block from 1297 // COND to (COND || GPR0) 1298 // 1299 // In order insert these MOV instruction, we will need to use the 1300 // RegisterScavenger. Usually liveness stops being tracked during 1301 // the late machine optimization passes, however if we implement 1302 // bool TargetRegisterInfo::requiresRegisterScavenging( 1303 // const MachineFunction &MF) 1304 // and have it return true, liveness will be tracked correctly 1305 // by generic optimization passes. We will also need to make sure that 1306 // all of our target-specific passes that run after regalloc and before 1307 // the CFGStructurizer track liveness and we will need to modify this pass 1308 // to correctly track liveness. 1309 // 1310 // After the above changes, the new CFG should look like this: 1311 // entry 1312 // / | 1313 // diamond_head branch_from 1314 // \ / 1315 // new_block 1316 // / | 1317 // diamond_false diamond_true 1318 // \ / 1319 // done 1320 // 1321 // Without this optimization, we are forced to duplicate the diamond_true 1322 // block and we will end up with a CFG like this: 1323 // 1324 // entry 1325 // / | 1326 // diamond_head branch_from 1327 // / \ | 1328 // diamond_false diamond_true diamond_true (duplicate) 1329 // \ / | 1330 // done --------------------| 1331 // 1332 // Duplicating diamond_true can be very costly especially if it has a 1333 // lot of instructions. 1334 return 0; 1335 } 1336 1337 int NumNewBlk = 0; 1338 1339 bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); 1340 1341 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" 1342 MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF); 1343 1344 if (LandBlkHasOtherPred) { 1345 report_fatal_error("Extra register needed to handle CFG"); 1346 unsigned CmpResReg = 1347 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1348 report_fatal_error("Extra compare instruction needed to handle CFG"); 1349 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, 1350 CmpResReg, DebugLoc()); 1351 } 1352 1353 // XXX: We are running this after RA, so creating virtual registers will 1354 // cause an assertion failure in the PostRA scheduling pass. 1355 unsigned InitReg = 1356 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1357 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg, 1358 DebugLoc()); 1359 1360 if (MigrateTrue) { 1361 migrateInstruction(TrueMBB, LandBlk, I); 1362 // need to uncondionally insert the assignment to ensure a path from its 1363 // predecessor rather than headBlk has valid value in initReg if 1364 // (initVal != 1). 1365 report_fatal_error("Extra register needed to handle CFG"); 1366 } 1367 insertInstrBefore(I, AMDGPU::ELSE); 1368 1369 if (MigrateFalse) { 1370 migrateInstruction(FalseMBB, LandBlk, I); 1371 // need to uncondionally insert the assignment to ensure a path from its 1372 // predecessor rather than headBlk has valid value in initReg if 1373 // (initVal != 0) 1374 report_fatal_error("Extra register needed to handle CFG"); 1375 } 1376 1377 if (LandBlkHasOtherPred) { 1378 // add endif 1379 insertInstrBefore(I, AMDGPU::ENDIF); 1380 1381 // put initReg = 2 to other predecessors of landBlk 1382 for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), 1383 PE = LandBlk->pred_end(); PI != PE; ++PI) { 1384 MachineBasicBlock *MBB = *PI; 1385 if (MBB != TrueMBB && MBB != FalseMBB) 1386 report_fatal_error("Extra register needed to handle CFG"); 1387 } 1388 } 1389 DEBUG( 1390 dbgs() << "result from improveSimpleJumpintoIf: "; 1391 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); 1392 ); 1393 1394 // update landBlk 1395 *LandMBBPtr = LandBlk; 1396 1397 return NumNewBlk; 1398 } 1399 1400 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, 1401 MachineBasicBlock *SrcMBB) { 1402 DEBUG( 1403 dbgs() << "serialPattern BB" << DstMBB->getNumber() 1404 << " <= BB" << SrcMBB->getNumber() << "\n"; 1405 ); 1406 DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); 1407 1408 DstMBB->removeSuccessor(SrcMBB, true); 1409 cloneSuccessorList(DstMBB, SrcMBB); 1410 1411 removeSuccessor(SrcMBB); 1412 MLI->removeBlock(SrcMBB); 1413 retireBlock(SrcMBB); 1414 } 1415 1416 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, 1417 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 1418 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { 1419 assert (TrueMBB); 1420 DEBUG( 1421 dbgs() << "ifPattern BB" << MBB->getNumber(); 1422 dbgs() << "{ "; 1423 if (TrueMBB) { 1424 dbgs() << "BB" << TrueMBB->getNumber(); 1425 } 1426 dbgs() << " } else "; 1427 dbgs() << "{ "; 1428 if (FalseMBB) { 1429 dbgs() << "BB" << FalseMBB->getNumber(); 1430 } 1431 dbgs() << " }\n "; 1432 dbgs() << "landBlock: "; 1433 if (!LandMBB) { 1434 dbgs() << "NULL"; 1435 } else { 1436 dbgs() << "BB" << LandMBB->getNumber(); 1437 } 1438 dbgs() << "\n"; 1439 ); 1440 1441 int OldOpcode = BranchMI->getOpcode(); 1442 DebugLoc BranchDL = BranchMI->getDebugLoc(); 1443 1444 // transform to 1445 // if cond 1446 // trueBlk 1447 // else 1448 // falseBlk 1449 // endif 1450 // landBlk 1451 1452 MachineBasicBlock::iterator I = BranchMI; 1453 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), 1454 BranchDL); 1455 1456 if (TrueMBB) { 1457 MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); 1458 MBB->removeSuccessor(TrueMBB, true); 1459 if (LandMBB && TrueMBB->succ_size()!=0) 1460 TrueMBB->removeSuccessor(LandMBB, true); 1461 retireBlock(TrueMBB); 1462 MLI->removeBlock(TrueMBB); 1463 } 1464 1465 if (FalseMBB) { 1466 insertInstrBefore(I, AMDGPU::ELSE); 1467 MBB->splice(I, FalseMBB, FalseMBB->begin(), 1468 FalseMBB->end()); 1469 MBB->removeSuccessor(FalseMBB, true); 1470 if (LandMBB && FalseMBB->succ_size() != 0) 1471 FalseMBB->removeSuccessor(LandMBB, true); 1472 retireBlock(FalseMBB); 1473 MLI->removeBlock(FalseMBB); 1474 } 1475 insertInstrBefore(I, AMDGPU::ENDIF); 1476 1477 BranchMI->eraseFromParent(); 1478 1479 if (LandMBB && TrueMBB && FalseMBB) 1480 MBB->addSuccessor(LandMBB); 1481 1482 } 1483 1484 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, 1485 MachineBasicBlock *LandMBB) { 1486 DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() 1487 << " land = BB" << LandMBB->getNumber() << "\n";); 1488 1489 insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); 1490 insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); 1491 DstBlk->replaceSuccessor(DstBlk, LandMBB); 1492 } 1493 1494 1495 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 1496 MachineBasicBlock *LandMBB) { 1497 DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() 1498 << " land = BB" << LandMBB->getNumber() << "\n";); 1499 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); 1500 assert(BranchMI && isCondBranch(BranchMI)); 1501 DebugLoc DL = BranchMI->getDebugLoc(); 1502 MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); 1503 MachineBasicBlock::iterator I = BranchMI; 1504 if (TrueBranch != LandMBB) 1505 reversePredicateSetter(I, *I->getParent()); 1506 insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL); 1507 insertInstrBefore(I, AMDGPU::BREAK); 1508 insertInstrBefore(I, AMDGPU::ENDIF); 1509 //now branchInst can be erase safely 1510 BranchMI->eraseFromParent(); 1511 //now take care of successors, retire blocks 1512 ExitingMBB->removeSuccessor(LandMBB, true); 1513 } 1514 1515 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, 1516 MachineBasicBlock *ContMBB) { 1517 DEBUG(dbgs() << "settleLoopcontBlock conting = BB" 1518 << ContingMBB->getNumber() 1519 << ", cont = BB" << ContMBB->getNumber() << "\n";); 1520 1521 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); 1522 if (MI) { 1523 assert(isCondBranch(MI)); 1524 MachineBasicBlock::iterator I = MI; 1525 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 1526 int OldOpcode = MI->getOpcode(); 1527 DebugLoc DL = MI->getDebugLoc(); 1528 1529 bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); 1530 1531 if (!UseContinueLogical) { 1532 int BranchOpcode = 1533 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : 1534 getBranchZeroOpcode(OldOpcode); 1535 insertCondBranchBefore(I, BranchOpcode, DL); 1536 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1537 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL); 1538 insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL); 1539 } else { 1540 int BranchOpcode = 1541 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : 1542 getContinueZeroOpcode(OldOpcode); 1543 insertCondBranchBefore(I, BranchOpcode, DL); 1544 } 1545 1546 MI->eraseFromParent(); 1547 } else { 1548 // if we've arrived here then we've already erased the branch instruction 1549 // travel back up the basic block to see the last reference of our debug 1550 // location we've just inserted that reference here so it should be 1551 // representative insertEnd to ensure phi-moves, if exist, go before the 1552 // continue-instr. 1553 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, 1554 getLastDebugLocInBB(ContingMBB)); 1555 } 1556 } 1557 1558 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 1559 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { 1560 int Cloned = 0; 1561 assert(PreMBB->isSuccessor(SrcMBB)); 1562 while (SrcMBB && SrcMBB != DstMBB) { 1563 assert(SrcMBB->succ_size() == 1); 1564 if (SrcMBB->pred_size() > 1) { 1565 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); 1566 ++Cloned; 1567 } 1568 1569 PreMBB = SrcMBB; 1570 SrcMBB = *SrcMBB->succ_begin(); 1571 } 1572 1573 return Cloned; 1574 } 1575 1576 MachineBasicBlock * 1577 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, 1578 MachineBasicBlock *PredMBB) { 1579 assert(PredMBB->isSuccessor(MBB) && 1580 "succBlk is not a prececessor of curBlk"); 1581 1582 MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions 1583 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); 1584 //srcBlk, oldBlk, newBlk 1585 1586 PredMBB->replaceSuccessor(MBB, CloneMBB); 1587 1588 // add all successor to cloneBlk 1589 cloneSuccessorList(CloneMBB, MBB); 1590 1591 numClonedInstr += MBB->size(); 1592 1593 DEBUG( 1594 dbgs() << "Cloned block: " << "BB" 1595 << MBB->getNumber() << "size " << MBB->size() << "\n"; 1596 ); 1597 1598 SHOWNEWBLK(CloneMBB, "result of Cloned block: "); 1599 1600 return CloneMBB; 1601 } 1602 1603 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, 1604 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { 1605 MachineBasicBlock::iterator SpliceEnd; 1606 //look for the input branchinstr, not the AMDGPU branchinstr 1607 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); 1608 if (!BranchMI) { 1609 DEBUG( 1610 dbgs() << "migrateInstruction don't see branch instr\n" ; 1611 ); 1612 SpliceEnd = SrcMBB->end(); 1613 } else { 1614 DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); 1615 SpliceEnd = BranchMI; 1616 } 1617 DEBUG( 1618 dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size() 1619 << "srcSize = " << SrcMBB->size() << "\n"; 1620 ); 1621 1622 //splice insert before insertPos 1623 DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); 1624 1625 DEBUG( 1626 dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size() 1627 << "srcSize = " << SrcMBB->size() << '\n'; 1628 ); 1629 } 1630 1631 MachineBasicBlock * 1632 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { 1633 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1634 MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); 1635 1636 if (!LoopHeader || !LoopLatch) 1637 return nullptr; 1638 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); 1639 // Is LoopRep an infinite loop ? 1640 if (!BranchMI || !isUncondBranch(BranchMI)) 1641 return nullptr; 1642 1643 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1644 FuncRep->push_back(DummyExitBlk); //insert to function 1645 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 1646 DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); 1647 LLVMContext &Ctx = LoopHeader->getParent()->getFunction()->getContext(); 1648 Ctx.emitError("Extra register needed to handle CFG"); 1649 return nullptr; 1650 } 1651 1652 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { 1653 MachineInstr *BranchMI; 1654 1655 // I saw two unconditional branch in one basic block in example 1656 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 1657 while ((BranchMI = getLoopendBlockBranchInstr(MBB)) 1658 && isUncondBranch(BranchMI)) { 1659 DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); 1660 BranchMI->eraseFromParent(); 1661 } 1662 } 1663 1664 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( 1665 MachineBasicBlock *MBB) { 1666 if (MBB->succ_size() != 2) 1667 return; 1668 MachineBasicBlock *MBB1 = *MBB->succ_begin(); 1669 MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); 1670 if (MBB1 != MBB2) 1671 return; 1672 1673 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 1674 assert(BranchMI && isCondBranch(BranchMI)); 1675 DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); 1676 BranchMI->eraseFromParent(); 1677 SHOWNEWBLK(MBB1, "Removing redundant successor"); 1678 MBB->removeSuccessor(MBB1, true); 1679 } 1680 1681 void AMDGPUCFGStructurizer::addDummyExitBlock( 1682 SmallVectorImpl<MachineBasicBlock*> &RetMBB) { 1683 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1684 FuncRep->push_back(DummyExitBlk); //insert to function 1685 insertInstrEnd(DummyExitBlk, AMDGPU::RETURN); 1686 1687 for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(), 1688 E = RetMBB.end(); It != E; ++It) { 1689 MachineBasicBlock *MBB = *It; 1690 MachineInstr *MI = getReturnInstr(MBB); 1691 if (MI) 1692 MI->eraseFromParent(); 1693 MBB->addSuccessor(DummyExitBlk); 1694 DEBUG( 1695 dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() 1696 << " successors\n"; 1697 ); 1698 } 1699 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); 1700 } 1701 1702 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { 1703 while (MBB->succ_size()) 1704 MBB->removeSuccessor(*MBB->succ_begin()); 1705 } 1706 1707 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, 1708 int SccNum) { 1709 BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; 1710 if (!srcBlkInfo) 1711 srcBlkInfo = new BlockInformation(); 1712 srcBlkInfo->SccNum = SccNum; 1713 } 1714 1715 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { 1716 DEBUG( 1717 dbgs() << "Retiring BB" << MBB->getNumber() << "\n"; 1718 ); 1719 1720 BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; 1721 1722 if (!SrcBlkInfo) 1723 SrcBlkInfo = new BlockInformation(); 1724 1725 SrcBlkInfo->IsRetired = true; 1726 assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 1727 && "can't retire block yet"); 1728 } 1729 1730 char AMDGPUCFGStructurizer::ID = 0; 1731 1732 } // end anonymous namespace 1733 1734 1735 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", 1736 "AMDGPU CFG Structurizer", false, false) 1737 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1738 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) 1739 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 1740 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", 1741 "AMDGPU CFG Structurizer", false, false) 1742 1743 FunctionPass *llvm::createAMDGPUCFGStructurizerPass() { 1744 return new AMDGPUCFGStructurizer(); 1745 } 1746