1 //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 10 #include "AMDGPU.h" 11 #include "AMDGPUSubtarget.h" 12 #include "R600InstrInfo.h" 13 #include "R600RegisterInfo.h" 14 #include "llvm/ADT/DepthFirstIterator.h" 15 #include "llvm/ADT/SCCIterator.h" 16 #include "llvm/ADT/SmallPtrSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/ADT/StringRef.h" 20 #include "llvm/CodeGen/MachineBasicBlock.h" 21 #include "llvm/CodeGen/MachineDominators.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineFunctionPass.h" 24 #include "llvm/CodeGen/MachineInstr.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineJumpTableInfo.h" 27 #include "llvm/CodeGen/MachineLoopInfo.h" 28 #include "llvm/CodeGen/MachineOperand.h" 29 #include "llvm/CodeGen/MachinePostDominators.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/CodeGen/MachineValueType.h" 32 #include "llvm/IR/DebugLoc.h" 33 #include "llvm/IR/LLVMContext.h" 34 #include "llvm/Pass.h" 35 #include "llvm/Support/Debug.h" 36 #include "llvm/Support/ErrorHandling.h" 37 #include "llvm/Support/raw_ostream.h" 38 #include <cassert> 39 #include <cstddef> 40 #include <deque> 41 #include <iterator> 42 #include <map> 43 #include <utility> 44 #include <vector> 45 46 using namespace llvm; 47 48 #define DEBUG_TYPE "structcfg" 49 50 #define DEFAULT_VEC_SLOTS 8 51 52 // TODO: move-begin. 53 54 //===----------------------------------------------------------------------===// 55 // 56 // Statistics for CFGStructurizer. 57 // 58 //===----------------------------------------------------------------------===// 59 60 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 61 "matched"); 62 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 63 "matched"); 64 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 65 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 66 67 namespace llvm { 68 69 void initializeAMDGPUCFGStructurizerPass(PassRegistry &); 70 71 } // end namespace llvm 72 73 namespace { 74 75 //===----------------------------------------------------------------------===// 76 // 77 // Miscellaneous utility for CFGStructurizer. 78 // 79 //===----------------------------------------------------------------------===// 80 81 #define SHOWNEWINSTR(i) \ 82 DEBUG(dbgs() << "New instr: " << *i << "\n"); 83 84 #define SHOWNEWBLK(b, msg) \ 85 DEBUG( \ 86 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 87 dbgs() << "\n"; \ 88 ); 89 90 #define SHOWBLK_DETAIL(b, msg) \ 91 DEBUG( \ 92 if (b) { \ 93 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 94 b->print(dbgs()); \ 95 dbgs() << "\n"; \ 96 } \ 97 ); 98 99 #define INVALIDSCCNUM -1 100 101 //===----------------------------------------------------------------------===// 102 // 103 // supporting data structure for CFGStructurizer 104 // 105 //===----------------------------------------------------------------------===// 106 107 class BlockInformation { 108 public: 109 bool IsRetired = false; 110 int SccNum = INVALIDSCCNUM; 111 112 BlockInformation() = default; 113 }; 114 115 //===----------------------------------------------------------------------===// 116 // 117 // CFGStructurizer 118 // 119 //===----------------------------------------------------------------------===// 120 121 class AMDGPUCFGStructurizer : public MachineFunctionPass { 122 public: 123 using MBBVector = SmallVector<MachineBasicBlock *, 32>; 124 using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>; 125 using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>; 126 127 enum PathToKind { 128 Not_SinglePath = 0, 129 SinglePath_InPath = 1, 130 SinglePath_NotInPath = 2 131 }; 132 133 static char ID; 134 135 AMDGPUCFGStructurizer() : MachineFunctionPass(ID) { 136 initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); 137 } 138 139 StringRef getPassName() const override { 140 return "AMDGPU Control Flow Graph structurizer Pass"; 141 } 142 143 void getAnalysisUsage(AnalysisUsage &AU) const override { 144 AU.addRequired<MachineDominatorTree>(); 145 AU.addRequired<MachinePostDominatorTree>(); 146 AU.addRequired<MachineLoopInfo>(); 147 MachineFunctionPass::getAnalysisUsage(AU); 148 } 149 150 /// Perform the CFG structurization 151 bool run(); 152 153 /// Perform the CFG preparation 154 /// This step will remove every unconditionnal/dead jump instructions and make 155 /// sure all loops have an exit block 156 bool prepare(); 157 158 bool runOnMachineFunction(MachineFunction &MF) override { 159 TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); 160 TRI = &TII->getRegisterInfo(); 161 DEBUG(MF.dump();); 162 OrderedBlks.clear(); 163 Visited.clear(); 164 FuncRep = &MF; 165 MLI = &getAnalysis<MachineLoopInfo>(); 166 DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); 167 MDT = &getAnalysis<MachineDominatorTree>(); 168 DEBUG(MDT->print(dbgs(), (const Module*)nullptr);); 169 PDT = &getAnalysis<MachinePostDominatorTree>(); 170 DEBUG(PDT->print(dbgs());); 171 prepare(); 172 run(); 173 DEBUG(MF.dump();); 174 return true; 175 } 176 177 protected: 178 MachineDominatorTree *MDT; 179 MachinePostDominatorTree *PDT; 180 MachineLoopInfo *MLI; 181 const R600InstrInfo *TII = nullptr; 182 const R600RegisterInfo *TRI = nullptr; 183 184 // PRINT FUNCTIONS 185 /// Print the ordered Blocks. 186 void printOrderedBlocks() const { 187 size_t i = 0; 188 for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), 189 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { 190 dbgs() << "BB" << (*iterBlk)->getNumber(); 191 dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 192 if (i != 0 && i % 10 == 0) { 193 dbgs() << "\n"; 194 } else { 195 dbgs() << " "; 196 } 197 } 198 } 199 200 static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { 201 for (MachineLoop::iterator iter = LoopInfo.begin(), 202 iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { 203 (*iter)->print(dbgs(), 0); 204 } 205 } 206 207 // UTILITY FUNCTIONS 208 int getSCCNum(MachineBasicBlock *MBB) const; 209 MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; 210 bool hasBackEdge(MachineBasicBlock *MBB) const; 211 bool isRetiredBlock(MachineBasicBlock *MBB) const; 212 bool isActiveLoophead(MachineBasicBlock *MBB) const; 213 PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 214 bool AllowSideEntry = true) const; 215 int countActiveBlock(MBBVector::const_iterator It, 216 MBBVector::const_iterator E) const; 217 bool needMigrateBlock(MachineBasicBlock *MBB) const; 218 219 // Utility Functions 220 void reversePredicateSetter(MachineBasicBlock::iterator I, 221 MachineBasicBlock &MBB); 222 /// Compute the reversed DFS post order of Blocks 223 void orderBlocks(MachineFunction *MF); 224 225 // Function originally from CFGStructTraits 226 void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, 227 const DebugLoc &DL = DebugLoc()); 228 MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, 229 const DebugLoc &DL = DebugLoc()); 230 MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); 231 void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, 232 const DebugLoc &DL); 233 void insertCondBranchBefore(MachineBasicBlock *MBB, 234 MachineBasicBlock::iterator I, int NewOpcode, 235 int RegNum, const DebugLoc &DL); 236 237 static int getBranchNzeroOpcode(int OldOpcode); 238 static int getBranchZeroOpcode(int OldOpcode); 239 static int getContinueNzeroOpcode(int OldOpcode); 240 static int getContinueZeroOpcode(int OldOpcode); 241 static MachineBasicBlock *getTrueBranch(MachineInstr *MI); 242 static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); 243 static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, 244 MachineInstr *MI); 245 static bool isCondBranch(MachineInstr *MI); 246 static bool isUncondBranch(MachineInstr *MI); 247 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); 248 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); 249 250 /// The correct naming for this is getPossibleLoopendBlockBranchInstr. 251 /// 252 /// BB with backward-edge could have move instructions after the branch 253 /// instruction. Such move instruction "belong to" the loop backward-edge. 254 MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); 255 256 static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); 257 static bool isReturnBlock(MachineBasicBlock *MBB); 258 static void cloneSuccessorList(MachineBasicBlock *DstMBB, 259 MachineBasicBlock *SrcMBB); 260 static MachineBasicBlock *clone(MachineBasicBlock *MBB); 261 262 /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose 263 /// because the AMDGPU instruction is not recognized as terminator fix this 264 /// and retire this routine 265 void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, 266 MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); 267 268 static void wrapup(MachineBasicBlock *MBB); 269 270 int patternMatch(MachineBasicBlock *MBB); 271 int patternMatchGroup(MachineBasicBlock *MBB); 272 int serialPatternMatch(MachineBasicBlock *MBB); 273 int ifPatternMatch(MachineBasicBlock *MBB); 274 int loopendPatternMatch(); 275 int mergeLoop(MachineLoop *LoopRep); 276 277 /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in 278 /// the same loop with LoopLandInfo without explicitly keeping track of 279 /// loopContBlks and loopBreakBlks, this is a method to get the information. 280 bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, 281 MachineBasicBlock *Src2MBB); 282 int handleJumpintoIf(MachineBasicBlock *HeadMBB, 283 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 284 int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 285 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 286 int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 287 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 288 MachineBasicBlock **LandMBBPtr); 289 void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 290 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 291 MachineBasicBlock *LandMBB, bool Detail = false); 292 int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 293 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); 294 void mergeSerialBlock(MachineBasicBlock *DstMBB, 295 MachineBasicBlock *SrcMBB); 296 297 void mergeIfthenelseBlock(MachineInstr *BranchMI, 298 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 299 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); 300 void mergeLooplandBlock(MachineBasicBlock *DstMBB, 301 MachineBasicBlock *LandMBB); 302 void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 303 MachineBasicBlock *LandMBB); 304 void settleLoopcontBlock(MachineBasicBlock *ContingMBB, 305 MachineBasicBlock *ContMBB); 306 307 /// normalizeInfiniteLoopExit change 308 /// B1: 309 /// uncond_br LoopHeader 310 /// 311 /// to 312 /// B1: 313 /// cond_br 1 LoopHeader dummyExit 314 /// and return the newly added dummy exit block 315 MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); 316 void removeUnconditionalBranch(MachineBasicBlock *MBB); 317 318 /// Remove duplicate branches instructions in a block. 319 /// For instance 320 /// B0: 321 /// cond_br X B1 B2 322 /// cond_br X B1 B2 323 /// is transformed to 324 /// B0: 325 /// cond_br X B1 B2 326 void removeRedundantConditionalBranch(MachineBasicBlock *MBB); 327 328 void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); 329 void removeSuccessor(MachineBasicBlock *MBB); 330 MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, 331 MachineBasicBlock *PredMBB); 332 void migrateInstruction(MachineBasicBlock *SrcMBB, 333 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); 334 void recordSccnum(MachineBasicBlock *MBB, int SCCNum); 335 void retireBlock(MachineBasicBlock *MBB); 336 337 private: 338 MBBInfoMap BlockInfoMap; 339 LoopLandInfoMap LLInfoMap; 340 std::map<MachineLoop *, bool> Visited; 341 MachineFunction *FuncRep; 342 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; 343 }; 344 345 } // end anonymous namespace 346 347 char AMDGPUCFGStructurizer::ID = 0; 348 349 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { 350 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 351 if (It == BlockInfoMap.end()) 352 return INVALIDSCCNUM; 353 return (*It).second->SccNum; 354 } 355 356 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) 357 const { 358 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); 359 if (It == LLInfoMap.end()) 360 return nullptr; 361 return (*It).second; 362 } 363 364 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { 365 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 366 if (!LoopRep) 367 return false; 368 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 369 return MBB->isSuccessor(LoopHeader); 370 } 371 372 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { 373 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 374 if (It == BlockInfoMap.end()) 375 return false; 376 return (*It).second->IsRetired; 377 } 378 379 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { 380 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 381 while (LoopRep && LoopRep->getHeader() == MBB) { 382 MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); 383 if(!LoopLand) 384 return true; 385 if (!isRetiredBlock(LoopLand)) 386 return true; 387 LoopRep = LoopRep->getParentLoop(); 388 } 389 return false; 390 } 391 392 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( 393 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 394 bool AllowSideEntry) const { 395 assert(DstMBB); 396 if (SrcMBB == DstMBB) 397 return SinglePath_InPath; 398 while (SrcMBB && SrcMBB->succ_size() == 1) { 399 SrcMBB = *SrcMBB->succ_begin(); 400 if (SrcMBB == DstMBB) 401 return SinglePath_InPath; 402 if (!AllowSideEntry && SrcMBB->pred_size() > 1) 403 return Not_SinglePath; 404 } 405 if (SrcMBB && SrcMBB->succ_size()==0) 406 return SinglePath_NotInPath; 407 return Not_SinglePath; 408 } 409 410 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, 411 MBBVector::const_iterator E) const { 412 int Count = 0; 413 while (It != E) { 414 if (!isRetiredBlock(*It)) 415 ++Count; 416 ++It; 417 } 418 return Count; 419 } 420 421 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { 422 unsigned BlockSizeThreshold = 30; 423 unsigned CloneInstrThreshold = 100; 424 bool MultiplePreds = MBB && (MBB->pred_size() > 1); 425 426 if(!MultiplePreds) 427 return false; 428 unsigned BlkSize = MBB->size(); 429 return ((BlkSize > BlockSizeThreshold) && 430 (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); 431 } 432 433 void AMDGPUCFGStructurizer::reversePredicateSetter( 434 MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { 435 assert(I.isValid() && "Expected valid iterator"); 436 for (;; --I) { 437 if (I == MBB.end()) 438 continue; 439 if (I->getOpcode() == AMDGPU::PRED_X) { 440 switch (I->getOperand(2).getImm()) { 441 case AMDGPU::PRED_SETE_INT: 442 I->getOperand(2).setImm(AMDGPU::PRED_SETNE_INT); 443 return; 444 case AMDGPU::PRED_SETNE_INT: 445 I->getOperand(2).setImm(AMDGPU::PRED_SETE_INT); 446 return; 447 case AMDGPU::PRED_SETE: 448 I->getOperand(2).setImm(AMDGPU::PRED_SETNE); 449 return; 450 case AMDGPU::PRED_SETNE: 451 I->getOperand(2).setImm(AMDGPU::PRED_SETE); 452 return; 453 default: 454 llvm_unreachable("PRED_X Opcode invalid!"); 455 } 456 } 457 } 458 } 459 460 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, 461 int NewOpcode, const DebugLoc &DL) { 462 MachineInstr *MI = 463 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 464 MBB->push_back(MI); 465 //assume the instruction doesn't take any reg operand ... 466 SHOWNEWINSTR(MI); 467 } 468 469 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, 470 int NewOpcode, 471 const DebugLoc &DL) { 472 MachineInstr *MI = 473 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 474 if (MBB->begin() != MBB->end()) 475 MBB->insert(MBB->begin(), MI); 476 else 477 MBB->push_back(MI); 478 SHOWNEWINSTR(MI); 479 return MI; 480 } 481 482 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( 483 MachineBasicBlock::iterator I, int NewOpcode) { 484 MachineInstr *OldMI = &(*I); 485 MachineBasicBlock *MBB = OldMI->getParent(); 486 MachineInstr *NewMBB = 487 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 488 MBB->insert(I, NewMBB); 489 //assume the instruction doesn't take any reg operand ... 490 SHOWNEWINSTR(NewMBB); 491 return NewMBB; 492 } 493 494 void AMDGPUCFGStructurizer::insertCondBranchBefore( 495 MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { 496 MachineInstr *OldMI = &(*I); 497 MachineBasicBlock *MBB = OldMI->getParent(); 498 MachineFunction *MF = MBB->getParent(); 499 MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 500 MBB->insert(I, NewMI); 501 MachineInstrBuilder MIB(*MF, NewMI); 502 MIB.addReg(OldMI->getOperand(1).getReg(), false); 503 SHOWNEWINSTR(NewMI); 504 //erase later oldInstr->eraseFromParent(); 505 } 506 507 void AMDGPUCFGStructurizer::insertCondBranchBefore( 508 MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, 509 int RegNum, const DebugLoc &DL) { 510 MachineFunction *MF = blk->getParent(); 511 MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 512 //insert before 513 blk->insert(I, NewInstr); 514 MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 515 SHOWNEWINSTR(NewInstr); 516 } 517 518 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { 519 switch(OldOpcode) { 520 case AMDGPU::JUMP_COND: 521 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 522 case AMDGPU::BRANCH_COND_i32: 523 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; 524 default: llvm_unreachable("internal error"); 525 } 526 return -1; 527 } 528 529 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { 530 switch(OldOpcode) { 531 case AMDGPU::JUMP_COND: 532 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 533 case AMDGPU::BRANCH_COND_i32: 534 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; 535 default: llvm_unreachable("internal error"); 536 } 537 return -1; 538 } 539 540 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { 541 switch(OldOpcode) { 542 case AMDGPU::JUMP_COND: 543 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; 544 default: llvm_unreachable("internal error"); 545 } 546 return -1; 547 } 548 549 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { 550 switch(OldOpcode) { 551 case AMDGPU::JUMP_COND: 552 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; 553 default: llvm_unreachable("internal error"); 554 } 555 return -1; 556 } 557 558 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { 559 return MI->getOperand(0).getMBB(); 560 } 561 562 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, 563 MachineBasicBlock *MBB) { 564 MI->getOperand(0).setMBB(MBB); 565 } 566 567 MachineBasicBlock * 568 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, 569 MachineInstr *MI) { 570 assert(MBB->succ_size() == 2); 571 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 572 MachineBasicBlock::succ_iterator It = MBB->succ_begin(); 573 MachineBasicBlock::succ_iterator Next = It; 574 ++Next; 575 return (*It == TrueBranch) ? *Next : *It; 576 } 577 578 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { 579 switch (MI->getOpcode()) { 580 case AMDGPU::JUMP_COND: 581 case AMDGPU::BRANCH_COND_i32: 582 case AMDGPU::BRANCH_COND_f32: return true; 583 default: 584 return false; 585 } 586 return false; 587 } 588 589 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { 590 switch (MI->getOpcode()) { 591 case AMDGPU::JUMP: 592 case AMDGPU::BRANCH: 593 return true; 594 default: 595 return false; 596 } 597 return false; 598 } 599 600 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { 601 //get DebugLoc from the first MachineBasicBlock instruction with debug info 602 DebugLoc DL; 603 for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); 604 ++It) { 605 MachineInstr *instr = &(*It); 606 if (instr->getDebugLoc()) 607 DL = instr->getDebugLoc(); 608 } 609 return DL; 610 } 611 612 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( 613 MachineBasicBlock *MBB) { 614 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 615 MachineInstr *MI = &*It; 616 if (MI && (isCondBranch(MI) || isUncondBranch(MI))) 617 return MI; 618 return nullptr; 619 } 620 621 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( 622 MachineBasicBlock *MBB) { 623 for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); 624 It != E; ++It) { 625 // FIXME: Simplify 626 MachineInstr *MI = &*It; 627 if (MI) { 628 if (isCondBranch(MI) || isUncondBranch(MI)) 629 return MI; 630 else if (!TII->isMov(MI->getOpcode())) 631 break; 632 } 633 } 634 return nullptr; 635 } 636 637 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { 638 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 639 if (It != MBB->rend()) { 640 MachineInstr *instr = &(*It); 641 if (instr->getOpcode() == AMDGPU::RETURN) 642 return instr; 643 } 644 return nullptr; 645 } 646 647 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { 648 MachineInstr *MI = getReturnInstr(MBB); 649 bool IsReturn = (MBB->succ_size() == 0); 650 if (MI) 651 assert(IsReturn); 652 else if (IsReturn) 653 DEBUG( 654 dbgs() << "BB" << MBB->getNumber() 655 <<" is return block without RETURN instr\n";); 656 return IsReturn; 657 } 658 659 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, 660 MachineBasicBlock *SrcMBB) { 661 for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), 662 iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) 663 DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of 664 } 665 666 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { 667 MachineFunction *Func = MBB->getParent(); 668 MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); 669 Func->push_back(NewMBB); //insert to function 670 for (const MachineInstr &It : *MBB) 671 NewMBB->push_back(Func->CloneMachineInstr(&It)); 672 return NewMBB; 673 } 674 675 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( 676 MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, 677 MachineBasicBlock *NewBlk) { 678 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); 679 if (BranchMI && isCondBranch(BranchMI) && 680 getTrueBranch(BranchMI) == OldMBB) 681 setTrueBranch(BranchMI, NewBlk); 682 } 683 684 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { 685 assert((!MBB->getParent()->getJumpTableInfo() 686 || MBB->getParent()->getJumpTableInfo()->isEmpty()) 687 && "found a jump table"); 688 689 //collect continue right before endloop 690 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; 691 MachineBasicBlock::iterator Pre = MBB->begin(); 692 MachineBasicBlock::iterator E = MBB->end(); 693 MachineBasicBlock::iterator It = Pre; 694 while (It != E) { 695 if (Pre->getOpcode() == AMDGPU::CONTINUE 696 && It->getOpcode() == AMDGPU::ENDLOOP) 697 ContInstr.push_back(&*Pre); 698 Pre = It; 699 ++It; 700 } 701 702 //delete continue right before endloop 703 for (unsigned i = 0; i < ContInstr.size(); ++i) 704 ContInstr[i]->eraseFromParent(); 705 706 // TODO to fix up jump table so later phase won't be confused. if 707 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 708 // there isn't such an interface yet. alternatively, replace all the other 709 // blocks in the jump table with the entryBlk //} 710 } 711 712 bool AMDGPUCFGStructurizer::prepare() { 713 bool Changed = false; 714 715 //FIXME: if not reducible flow graph, make it so ??? 716 717 DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); 718 719 orderBlocks(FuncRep); 720 721 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; 722 723 // Add an ExitBlk to loop that don't have one 724 for (MachineLoopInfo::iterator It = MLI->begin(), 725 E = MLI->end(); It != E; ++It) { 726 MachineLoop *LoopRep = (*It); 727 MBBVector ExitingMBBs; 728 LoopRep->getExitingBlocks(ExitingMBBs); 729 730 if (ExitingMBBs.size() == 0) { 731 MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); 732 if (DummyExitBlk) 733 RetBlks.push_back(DummyExitBlk); 734 } 735 } 736 737 // Remove unconditional branch instr. 738 // Add dummy exit block iff there are multiple returns. 739 for (SmallVectorImpl<MachineBasicBlock *>::const_iterator 740 It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { 741 MachineBasicBlock *MBB = *It; 742 removeUnconditionalBranch(MBB); 743 removeRedundantConditionalBranch(MBB); 744 if (isReturnBlock(MBB)) { 745 RetBlks.push_back(MBB); 746 } 747 assert(MBB->succ_size() <= 2); 748 } 749 750 if (RetBlks.size() >= 2) { 751 addDummyExitBlock(RetBlks); 752 Changed = true; 753 } 754 755 return Changed; 756 } 757 758 bool AMDGPUCFGStructurizer::run() { 759 //Assume reducible CFG... 760 DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); 761 762 #ifdef STRESSTEST 763 //Use the worse block ordering to test the algorithm. 764 ReverseVector(orderedBlks); 765 #endif 766 767 DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); 768 int NumIter = 0; 769 bool Finish = false; 770 MachineBasicBlock *MBB; 771 bool MakeProgress = false; 772 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), 773 OrderedBlks.end()); 774 775 do { 776 ++NumIter; 777 DEBUG( 778 dbgs() << "numIter = " << NumIter 779 << ", numRemaintedBlk = " << NumRemainedBlk << "\n"; 780 ); 781 782 SmallVectorImpl<MachineBasicBlock *>::const_iterator It = 783 OrderedBlks.begin(); 784 SmallVectorImpl<MachineBasicBlock *>::const_iterator E = 785 OrderedBlks.end(); 786 787 SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = 788 It; 789 MachineBasicBlock *SccBeginMBB = nullptr; 790 int SccNumBlk = 0; // The number of active blocks, init to a 791 // maximum possible number. 792 int SccNumIter; // Number of iteration in this SCC. 793 794 while (It != E) { 795 MBB = *It; 796 797 if (!SccBeginMBB) { 798 SccBeginIter = It; 799 SccBeginMBB = MBB; 800 SccNumIter = 0; 801 SccNumBlk = NumRemainedBlk; // Init to maximum possible number. 802 DEBUG( 803 dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); 804 dbgs() << "\n"; 805 ); 806 } 807 808 if (!isRetiredBlock(MBB)) 809 patternMatch(MBB); 810 811 ++It; 812 813 bool ContNextScc = true; 814 if (It == E 815 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { 816 // Just finish one scc. 817 ++SccNumIter; 818 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); 819 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { 820 DEBUG( 821 dbgs() << "Can't reduce SCC " << getSCCNum(MBB) 822 << ", sccNumIter = " << SccNumIter; 823 dbgs() << "doesn't make any progress\n"; 824 ); 825 ContNextScc = true; 826 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { 827 SccNumBlk = sccRemainedNumBlk; 828 It = SccBeginIter; 829 ContNextScc = false; 830 DEBUG( 831 dbgs() << "repeat processing SCC" << getSCCNum(MBB) 832 << "sccNumIter = " << SccNumIter << '\n'; 833 ); 834 } else { 835 // Finish the current scc. 836 ContNextScc = true; 837 } 838 } else { 839 // Continue on next component in the current scc. 840 ContNextScc = false; 841 } 842 843 if (ContNextScc) 844 SccBeginMBB = nullptr; 845 } //while, "one iteration" over the function. 846 847 MachineBasicBlock *EntryMBB = 848 *GraphTraits<MachineFunction *>::nodes_begin(FuncRep); 849 if (EntryMBB->succ_size() == 0) { 850 Finish = true; 851 DEBUG( 852 dbgs() << "Reduce to one block\n"; 853 ); 854 } else { 855 int NewnumRemainedBlk 856 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); 857 // consider cloned blocks ?? 858 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { 859 MakeProgress = true; 860 NumRemainedBlk = NewnumRemainedBlk; 861 } else { 862 MakeProgress = false; 863 DEBUG( 864 dbgs() << "No progress\n"; 865 ); 866 } 867 } 868 } while (!Finish && MakeProgress); 869 870 // Misc wrap up to maintain the consistency of the Function representation. 871 wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep)); 872 873 // Detach retired Block, release memory. 874 for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); 875 It != E; ++It) { 876 if ((*It).second && (*It).second->IsRetired) { 877 assert(((*It).first)->getNumber() != -1); 878 DEBUG( 879 dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n"; 880 ); 881 (*It).first->eraseFromParent(); //Remove from the parent Function. 882 } 883 delete (*It).second; 884 } 885 BlockInfoMap.clear(); 886 LLInfoMap.clear(); 887 888 if (!Finish) { 889 DEBUG(FuncRep->viewCFG()); 890 report_fatal_error("IRREDUCIBLE_CFG"); 891 } 892 893 return true; 894 } 895 896 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { 897 int SccNum = 0; 898 MachineBasicBlock *MBB; 899 for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd(); 900 ++It, ++SccNum) { 901 const std::vector<MachineBasicBlock *> &SccNext = *It; 902 for (std::vector<MachineBasicBlock *>::const_iterator 903 blockIter = SccNext.begin(), blockEnd = SccNext.end(); 904 blockIter != blockEnd; ++blockIter) { 905 MBB = *blockIter; 906 OrderedBlks.push_back(MBB); 907 recordSccnum(MBB, SccNum); 908 } 909 } 910 911 // walk through all the block in func to check for unreachable 912 for (auto *MBB : nodes(MF)) { 913 SccNum = getSCCNum(MBB); 914 if (SccNum == INVALIDSCCNUM) 915 dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; 916 } 917 } 918 919 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { 920 int NumMatch = 0; 921 int CurMatch; 922 923 DEBUG( 924 dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n"; 925 ); 926 927 while ((CurMatch = patternMatchGroup(MBB)) > 0) 928 NumMatch += CurMatch; 929 930 DEBUG( 931 dbgs() << "End patternMatch BB" << MBB->getNumber() 932 << ", numMatch = " << NumMatch << "\n"; 933 ); 934 935 return NumMatch; 936 } 937 938 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { 939 int NumMatch = 0; 940 NumMatch += loopendPatternMatch(); 941 NumMatch += serialPatternMatch(MBB); 942 NumMatch += ifPatternMatch(MBB); 943 return NumMatch; 944 } 945 946 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { 947 if (MBB->succ_size() != 1) 948 return 0; 949 950 MachineBasicBlock *childBlk = *MBB->succ_begin(); 951 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) 952 return 0; 953 954 mergeSerialBlock(MBB, childBlk); 955 ++numSerialPatternMatch; 956 return 1; 957 } 958 959 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { 960 //two edges 961 if (MBB->succ_size() != 2) 962 return 0; 963 if (hasBackEdge(MBB)) 964 return 0; 965 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 966 if (!BranchMI) 967 return 0; 968 969 assert(isCondBranch(BranchMI)); 970 int NumMatch = 0; 971 972 MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); 973 NumMatch += serialPatternMatch(TrueMBB); 974 NumMatch += ifPatternMatch(TrueMBB); 975 MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); 976 NumMatch += serialPatternMatch(FalseMBB); 977 NumMatch += ifPatternMatch(FalseMBB); 978 MachineBasicBlock *LandBlk; 979 int Cloned = 0; 980 981 assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); 982 // TODO: Simplify 983 if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 984 && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { 985 // Diamond pattern 986 LandBlk = *TrueMBB->succ_begin(); 987 } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { 988 // Triangle pattern, false is empty 989 LandBlk = FalseMBB; 990 FalseMBB = nullptr; 991 } else if (FalseMBB->succ_size() == 1 992 && *FalseMBB->succ_begin() == TrueMBB) { 993 // Triangle pattern, true is empty 994 // We reverse the predicate to make a triangle, empty false pattern; 995 std::swap(TrueMBB, FalseMBB); 996 reversePredicateSetter(MBB->end(), *MBB); 997 LandBlk = FalseMBB; 998 FalseMBB = nullptr; 999 } else if (FalseMBB->succ_size() == 1 1000 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { 1001 LandBlk = *FalseMBB->succ_begin(); 1002 } else if (TrueMBB->succ_size() == 1 1003 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { 1004 LandBlk = *TrueMBB->succ_begin(); 1005 } else { 1006 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); 1007 } 1008 1009 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 1010 // new BB created for landBlk==NULL may introduce new challenge to the 1011 // reduction process. 1012 if (LandBlk && 1013 ((TrueMBB && TrueMBB->pred_size() > 1) 1014 || (FalseMBB && FalseMBB->pred_size() > 1))) { 1015 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); 1016 } 1017 1018 if (TrueMBB && TrueMBB->pred_size() > 1) { 1019 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); 1020 ++Cloned; 1021 } 1022 1023 if (FalseMBB && FalseMBB->pred_size() > 1) { 1024 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); 1025 ++Cloned; 1026 } 1027 1028 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); 1029 1030 ++numIfPatternMatch; 1031 1032 numClonedBlock += Cloned; 1033 1034 return 1 + Cloned + NumMatch; 1035 } 1036 1037 int AMDGPUCFGStructurizer::loopendPatternMatch() { 1038 std::deque<MachineLoop *> NestedLoops; 1039 for (auto &It: *MLI) 1040 for (MachineLoop *ML : depth_first(It)) 1041 NestedLoops.push_front(ML); 1042 1043 if (NestedLoops.empty()) 1044 return 0; 1045 1046 // Process nested loop outside->inside (we did push_front), 1047 // so "continue" to a outside loop won't be mistaken as "break" 1048 // of the current loop. 1049 int Num = 0; 1050 for (MachineLoop *ExaminedLoop : NestedLoops) { 1051 if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) 1052 continue; 1053 DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); 1054 int NumBreak = mergeLoop(ExaminedLoop); 1055 if (NumBreak == -1) 1056 break; 1057 Num += NumBreak; 1058 } 1059 return Num; 1060 } 1061 1062 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { 1063 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1064 MBBVector ExitingMBBs; 1065 LoopRep->getExitingBlocks(ExitingMBBs); 1066 assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); 1067 DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";); 1068 // We assume a single ExitBlk 1069 MBBVector ExitBlks; 1070 LoopRep->getExitBlocks(ExitBlks); 1071 SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; 1072 for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) 1073 ExitBlkSet.insert(ExitBlks[i]); 1074 assert(ExitBlkSet.size() == 1); 1075 MachineBasicBlock *ExitBlk = *ExitBlks.begin(); 1076 assert(ExitBlk && "Loop has several exit block"); 1077 MBBVector LatchBlks; 1078 for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader)) 1079 if (LoopRep->contains(LB)) 1080 LatchBlks.push_back(LB); 1081 1082 for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) 1083 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); 1084 for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) 1085 settleLoopcontBlock(LatchBlks[i], LoopHeader); 1086 int Match = 0; 1087 do { 1088 Match = 0; 1089 Match += serialPatternMatch(LoopHeader); 1090 Match += ifPatternMatch(LoopHeader); 1091 } while (Match > 0); 1092 mergeLooplandBlock(LoopHeader, ExitBlk); 1093 MachineLoop *ParentLoop = LoopRep->getParentLoop(); 1094 if (ParentLoop) 1095 MLI->changeLoopFor(LoopHeader, ParentLoop); 1096 else 1097 MLI->removeBlock(LoopHeader); 1098 Visited[LoopRep] = true; 1099 return 1; 1100 } 1101 1102 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( 1103 MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { 1104 if (Src1MBB->succ_size() == 0) { 1105 MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); 1106 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { 1107 MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; 1108 if (TheEntry) { 1109 DEBUG( 1110 dbgs() << "isLoopContBreakBlock yes src1 = BB" 1111 << Src1MBB->getNumber() 1112 << " src2 = BB" << Src2MBB->getNumber() << "\n"; 1113 ); 1114 return true; 1115 } 1116 } 1117 } 1118 return false; 1119 } 1120 1121 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, 1122 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1123 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); 1124 if (Num == 0) { 1125 DEBUG( 1126 dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; 1127 ); 1128 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); 1129 } 1130 return Num; 1131 } 1132 1133 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 1134 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1135 int Num = 0; 1136 MachineBasicBlock *DownBlk; 1137 1138 //trueBlk could be the common post dominator 1139 DownBlk = TrueMBB; 1140 1141 DEBUG( 1142 dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() 1143 << " true = BB" << TrueMBB->getNumber() 1144 << ", numSucc=" << TrueMBB->succ_size() 1145 << " false = BB" << FalseMBB->getNumber() << "\n"; 1146 ); 1147 1148 while (DownBlk) { 1149 DEBUG( 1150 dbgs() << "check down = BB" << DownBlk->getNumber(); 1151 ); 1152 1153 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { 1154 DEBUG( 1155 dbgs() << " working\n"; 1156 ); 1157 1158 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); 1159 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); 1160 1161 numClonedBlock += Num; 1162 Num += serialPatternMatch(*HeadMBB->succ_begin()); 1163 Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); 1164 Num += ifPatternMatch(HeadMBB); 1165 assert(Num > 0); 1166 1167 break; 1168 } 1169 DEBUG( 1170 dbgs() << " not working\n"; 1171 ); 1172 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; 1173 } // walk down the postDomTree 1174 1175 return Num; 1176 } 1177 1178 #ifndef NDEBUG 1179 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( 1180 MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, 1181 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { 1182 dbgs() << "head = BB" << HeadMBB->getNumber() 1183 << " size = " << HeadMBB->size(); 1184 if (Detail) { 1185 dbgs() << "\n"; 1186 HeadMBB->print(dbgs()); 1187 dbgs() << "\n"; 1188 } 1189 1190 if (TrueMBB) { 1191 dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " 1192 << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); 1193 if (Detail) { 1194 dbgs() << "\n"; 1195 TrueMBB->print(dbgs()); 1196 dbgs() << "\n"; 1197 } 1198 } 1199 if (FalseMBB) { 1200 dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " 1201 << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); 1202 if (Detail) { 1203 dbgs() << "\n"; 1204 FalseMBB->print(dbgs()); 1205 dbgs() << "\n"; 1206 } 1207 } 1208 if (LandMBB) { 1209 dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " 1210 << LandMBB->size() << " numPred = " << LandMBB->pred_size(); 1211 if (Detail) { 1212 dbgs() << "\n"; 1213 LandMBB->print(dbgs()); 1214 dbgs() << "\n"; 1215 } 1216 } 1217 1218 dbgs() << "\n"; 1219 } 1220 #endif 1221 1222 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 1223 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 1224 MachineBasicBlock **LandMBBPtr) { 1225 bool MigrateTrue = false; 1226 bool MigrateFalse = false; 1227 1228 MachineBasicBlock *LandBlk = *LandMBBPtr; 1229 1230 assert((!TrueMBB || TrueMBB->succ_size() <= 1) 1231 && (!FalseMBB || FalseMBB->succ_size() <= 1)); 1232 1233 if (TrueMBB == FalseMBB) 1234 return 0; 1235 1236 MigrateTrue = needMigrateBlock(TrueMBB); 1237 MigrateFalse = needMigrateBlock(FalseMBB); 1238 1239 if (!MigrateTrue && !MigrateFalse) 1240 return 0; 1241 1242 // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1243 // have more than one predecessors. without doing this, its predecessor 1244 // rather than headBlk will have undefined value in initReg. 1245 if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) 1246 MigrateTrue = true; 1247 if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) 1248 MigrateFalse = true; 1249 1250 DEBUG( 1251 dbgs() << "before improveSimpleJumpintoIf: "; 1252 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); 1253 ); 1254 1255 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1256 // 1257 // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1258 // {initReg = 0; org falseBlk branch } 1259 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1260 // => org landBlk 1261 // if landBlk->pred_size() > 2, put the about if-else inside 1262 // if (initReg !=2) {...} 1263 // 1264 // add initReg = initVal to headBlk 1265 1266 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1267 if (!MigrateTrue || !MigrateFalse) { 1268 // XXX: We have an opportunity here to optimize the "branch into if" case 1269 // here. Branch into if looks like this: 1270 // entry 1271 // / | 1272 // diamond_head branch_from 1273 // / \ | 1274 // diamond_false diamond_true 1275 // \ / 1276 // done 1277 // 1278 // The diamond_head block begins the "if" and the diamond_true block 1279 // is the block being "branched into". 1280 // 1281 // If MigrateTrue is true, then TrueBB is the block being "branched into" 1282 // and if MigrateFalse is true, then FalseBB is the block being 1283 // "branched into" 1284 // 1285 // Here is the pseudo code for how I think the optimization should work: 1286 // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. 1287 // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. 1288 // 3. Move the branch instruction from diamond_head into its own basic 1289 // block (new_block). 1290 // 4. Add an unconditional branch from diamond_head to new_block 1291 // 5. Replace the branch instruction in branch_from with an unconditional 1292 // branch to new_block. If branch_from has multiple predecessors, then 1293 // we need to replace the True/False block in the branch 1294 // instruction instead of replacing it. 1295 // 6. Change the condition of the branch instruction in new_block from 1296 // COND to (COND || GPR0) 1297 // 1298 // In order insert these MOV instruction, we will need to use the 1299 // RegisterScavenger. Usually liveness stops being tracked during 1300 // the late machine optimization passes, however if we implement 1301 // bool TargetRegisterInfo::requiresRegisterScavenging( 1302 // const MachineFunction &MF) 1303 // and have it return true, liveness will be tracked correctly 1304 // by generic optimization passes. We will also need to make sure that 1305 // all of our target-specific passes that run after regalloc and before 1306 // the CFGStructurizer track liveness and we will need to modify this pass 1307 // to correctly track liveness. 1308 // 1309 // After the above changes, the new CFG should look like this: 1310 // entry 1311 // / | 1312 // diamond_head branch_from 1313 // \ / 1314 // new_block 1315 // / | 1316 // diamond_false diamond_true 1317 // \ / 1318 // done 1319 // 1320 // Without this optimization, we are forced to duplicate the diamond_true 1321 // block and we will end up with a CFG like this: 1322 // 1323 // entry 1324 // / | 1325 // diamond_head branch_from 1326 // / \ | 1327 // diamond_false diamond_true diamond_true (duplicate) 1328 // \ / | 1329 // done --------------------| 1330 // 1331 // Duplicating diamond_true can be very costly especially if it has a 1332 // lot of instructions. 1333 return 0; 1334 } 1335 1336 int NumNewBlk = 0; 1337 1338 bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); 1339 1340 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" 1341 MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF); 1342 1343 if (LandBlkHasOtherPred) { 1344 report_fatal_error("Extra register needed to handle CFG"); 1345 unsigned CmpResReg = 1346 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1347 report_fatal_error("Extra compare instruction needed to handle CFG"); 1348 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, 1349 CmpResReg, DebugLoc()); 1350 } 1351 1352 // XXX: We are running this after RA, so creating virtual registers will 1353 // cause an assertion failure in the PostRA scheduling pass. 1354 unsigned InitReg = 1355 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1356 insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg, 1357 DebugLoc()); 1358 1359 if (MigrateTrue) { 1360 migrateInstruction(TrueMBB, LandBlk, I); 1361 // need to uncondionally insert the assignment to ensure a path from its 1362 // predecessor rather than headBlk has valid value in initReg if 1363 // (initVal != 1). 1364 report_fatal_error("Extra register needed to handle CFG"); 1365 } 1366 insertInstrBefore(I, AMDGPU::ELSE); 1367 1368 if (MigrateFalse) { 1369 migrateInstruction(FalseMBB, LandBlk, I); 1370 // need to uncondionally insert the assignment to ensure a path from its 1371 // predecessor rather than headBlk has valid value in initReg if 1372 // (initVal != 0) 1373 report_fatal_error("Extra register needed to handle CFG"); 1374 } 1375 1376 if (LandBlkHasOtherPred) { 1377 // add endif 1378 insertInstrBefore(I, AMDGPU::ENDIF); 1379 1380 // put initReg = 2 to other predecessors of landBlk 1381 for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), 1382 PE = LandBlk->pred_end(); PI != PE; ++PI) { 1383 MachineBasicBlock *MBB = *PI; 1384 if (MBB != TrueMBB && MBB != FalseMBB) 1385 report_fatal_error("Extra register needed to handle CFG"); 1386 } 1387 } 1388 DEBUG( 1389 dbgs() << "result from improveSimpleJumpintoIf: "; 1390 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0); 1391 ); 1392 1393 // update landBlk 1394 *LandMBBPtr = LandBlk; 1395 1396 return NumNewBlk; 1397 } 1398 1399 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, 1400 MachineBasicBlock *SrcMBB) { 1401 DEBUG( 1402 dbgs() << "serialPattern BB" << DstMBB->getNumber() 1403 << " <= BB" << SrcMBB->getNumber() << "\n"; 1404 ); 1405 DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); 1406 1407 DstMBB->removeSuccessor(SrcMBB, true); 1408 cloneSuccessorList(DstMBB, SrcMBB); 1409 1410 removeSuccessor(SrcMBB); 1411 MLI->removeBlock(SrcMBB); 1412 retireBlock(SrcMBB); 1413 } 1414 1415 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, 1416 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 1417 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { 1418 assert (TrueMBB); 1419 DEBUG( 1420 dbgs() << "ifPattern BB" << MBB->getNumber(); 1421 dbgs() << "{ "; 1422 if (TrueMBB) { 1423 dbgs() << "BB" << TrueMBB->getNumber(); 1424 } 1425 dbgs() << " } else "; 1426 dbgs() << "{ "; 1427 if (FalseMBB) { 1428 dbgs() << "BB" << FalseMBB->getNumber(); 1429 } 1430 dbgs() << " }\n "; 1431 dbgs() << "landBlock: "; 1432 if (!LandMBB) { 1433 dbgs() << "NULL"; 1434 } else { 1435 dbgs() << "BB" << LandMBB->getNumber(); 1436 } 1437 dbgs() << "\n"; 1438 ); 1439 1440 int OldOpcode = BranchMI->getOpcode(); 1441 DebugLoc BranchDL = BranchMI->getDebugLoc(); 1442 1443 // transform to 1444 // if cond 1445 // trueBlk 1446 // else 1447 // falseBlk 1448 // endif 1449 // landBlk 1450 1451 MachineBasicBlock::iterator I = BranchMI; 1452 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), 1453 BranchDL); 1454 1455 if (TrueMBB) { 1456 MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); 1457 MBB->removeSuccessor(TrueMBB, true); 1458 if (LandMBB && TrueMBB->succ_size()!=0) 1459 TrueMBB->removeSuccessor(LandMBB, true); 1460 retireBlock(TrueMBB); 1461 MLI->removeBlock(TrueMBB); 1462 } 1463 1464 if (FalseMBB) { 1465 insertInstrBefore(I, AMDGPU::ELSE); 1466 MBB->splice(I, FalseMBB, FalseMBB->begin(), 1467 FalseMBB->end()); 1468 MBB->removeSuccessor(FalseMBB, true); 1469 if (LandMBB && FalseMBB->succ_size() != 0) 1470 FalseMBB->removeSuccessor(LandMBB, true); 1471 retireBlock(FalseMBB); 1472 MLI->removeBlock(FalseMBB); 1473 } 1474 insertInstrBefore(I, AMDGPU::ENDIF); 1475 1476 BranchMI->eraseFromParent(); 1477 1478 if (LandMBB && TrueMBB && FalseMBB) 1479 MBB->addSuccessor(LandMBB); 1480 } 1481 1482 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, 1483 MachineBasicBlock *LandMBB) { 1484 DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() 1485 << " land = BB" << LandMBB->getNumber() << "\n";); 1486 1487 insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc()); 1488 insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc()); 1489 DstBlk->replaceSuccessor(DstBlk, LandMBB); 1490 } 1491 1492 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 1493 MachineBasicBlock *LandMBB) { 1494 DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber() 1495 << " land = BB" << LandMBB->getNumber() << "\n";); 1496 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); 1497 assert(BranchMI && isCondBranch(BranchMI)); 1498 DebugLoc DL = BranchMI->getDebugLoc(); 1499 MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); 1500 MachineBasicBlock::iterator I = BranchMI; 1501 if (TrueBranch != LandMBB) 1502 reversePredicateSetter(I, *I->getParent()); 1503 insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL); 1504 insertInstrBefore(I, AMDGPU::BREAK); 1505 insertInstrBefore(I, AMDGPU::ENDIF); 1506 //now branchInst can be erase safely 1507 BranchMI->eraseFromParent(); 1508 //now take care of successors, retire blocks 1509 ExitingMBB->removeSuccessor(LandMBB, true); 1510 } 1511 1512 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, 1513 MachineBasicBlock *ContMBB) { 1514 DEBUG(dbgs() << "settleLoopcontBlock conting = BB" 1515 << ContingMBB->getNumber() 1516 << ", cont = BB" << ContMBB->getNumber() << "\n";); 1517 1518 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); 1519 if (MI) { 1520 assert(isCondBranch(MI)); 1521 MachineBasicBlock::iterator I = MI; 1522 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 1523 int OldOpcode = MI->getOpcode(); 1524 DebugLoc DL = MI->getDebugLoc(); 1525 1526 bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); 1527 1528 if (!UseContinueLogical) { 1529 int BranchOpcode = 1530 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : 1531 getBranchZeroOpcode(OldOpcode); 1532 insertCondBranchBefore(I, BranchOpcode, DL); 1533 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1534 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL); 1535 insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL); 1536 } else { 1537 int BranchOpcode = 1538 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : 1539 getContinueZeroOpcode(OldOpcode); 1540 insertCondBranchBefore(I, BranchOpcode, DL); 1541 } 1542 1543 MI->eraseFromParent(); 1544 } else { 1545 // if we've arrived here then we've already erased the branch instruction 1546 // travel back up the basic block to see the last reference of our debug 1547 // location we've just inserted that reference here so it should be 1548 // representative insertEnd to ensure phi-moves, if exist, go before the 1549 // continue-instr. 1550 insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, 1551 getLastDebugLocInBB(ContingMBB)); 1552 } 1553 } 1554 1555 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 1556 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { 1557 int Cloned = 0; 1558 assert(PreMBB->isSuccessor(SrcMBB)); 1559 while (SrcMBB && SrcMBB != DstMBB) { 1560 assert(SrcMBB->succ_size() == 1); 1561 if (SrcMBB->pred_size() > 1) { 1562 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); 1563 ++Cloned; 1564 } 1565 1566 PreMBB = SrcMBB; 1567 SrcMBB = *SrcMBB->succ_begin(); 1568 } 1569 1570 return Cloned; 1571 } 1572 1573 MachineBasicBlock * 1574 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, 1575 MachineBasicBlock *PredMBB) { 1576 assert(PredMBB->isSuccessor(MBB) && 1577 "succBlk is not a prececessor of curBlk"); 1578 1579 MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions 1580 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); 1581 //srcBlk, oldBlk, newBlk 1582 1583 PredMBB->replaceSuccessor(MBB, CloneMBB); 1584 1585 // add all successor to cloneBlk 1586 cloneSuccessorList(CloneMBB, MBB); 1587 1588 numClonedInstr += MBB->size(); 1589 1590 DEBUG( 1591 dbgs() << "Cloned block: " << "BB" 1592 << MBB->getNumber() << "size " << MBB->size() << "\n"; 1593 ); 1594 1595 SHOWNEWBLK(CloneMBB, "result of Cloned block: "); 1596 1597 return CloneMBB; 1598 } 1599 1600 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, 1601 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { 1602 MachineBasicBlock::iterator SpliceEnd; 1603 //look for the input branchinstr, not the AMDGPU branchinstr 1604 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); 1605 if (!BranchMI) { 1606 DEBUG( 1607 dbgs() << "migrateInstruction don't see branch instr\n"; 1608 ); 1609 SpliceEnd = SrcMBB->end(); 1610 } else { 1611 DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); 1612 SpliceEnd = BranchMI; 1613 } 1614 DEBUG( 1615 dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size() 1616 << "srcSize = " << SrcMBB->size() << "\n"; 1617 ); 1618 1619 //splice insert before insertPos 1620 DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); 1621 1622 DEBUG( 1623 dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size() 1624 << "srcSize = " << SrcMBB->size() << '\n'; 1625 ); 1626 } 1627 1628 MachineBasicBlock * 1629 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { 1630 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1631 MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); 1632 1633 if (!LoopHeader || !LoopLatch) 1634 return nullptr; 1635 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); 1636 // Is LoopRep an infinite loop ? 1637 if (!BranchMI || !isUncondBranch(BranchMI)) 1638 return nullptr; 1639 1640 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1641 FuncRep->push_back(DummyExitBlk); //insert to function 1642 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 1643 DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); 1644 LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); 1645 Ctx.emitError("Extra register needed to handle CFG"); 1646 return nullptr; 1647 } 1648 1649 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { 1650 MachineInstr *BranchMI; 1651 1652 // I saw two unconditional branch in one basic block in example 1653 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 1654 while ((BranchMI = getLoopendBlockBranchInstr(MBB)) 1655 && isUncondBranch(BranchMI)) { 1656 DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); 1657 BranchMI->eraseFromParent(); 1658 } 1659 } 1660 1661 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( 1662 MachineBasicBlock *MBB) { 1663 if (MBB->succ_size() != 2) 1664 return; 1665 MachineBasicBlock *MBB1 = *MBB->succ_begin(); 1666 MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); 1667 if (MBB1 != MBB2) 1668 return; 1669 1670 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 1671 assert(BranchMI && isCondBranch(BranchMI)); 1672 DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); 1673 BranchMI->eraseFromParent(); 1674 SHOWNEWBLK(MBB1, "Removing redundant successor"); 1675 MBB->removeSuccessor(MBB1, true); 1676 } 1677 1678 void AMDGPUCFGStructurizer::addDummyExitBlock( 1679 SmallVectorImpl<MachineBasicBlock*> &RetMBB) { 1680 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1681 FuncRep->push_back(DummyExitBlk); //insert to function 1682 insertInstrEnd(DummyExitBlk, AMDGPU::RETURN); 1683 1684 for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(), 1685 E = RetMBB.end(); It != E; ++It) { 1686 MachineBasicBlock *MBB = *It; 1687 MachineInstr *MI = getReturnInstr(MBB); 1688 if (MI) 1689 MI->eraseFromParent(); 1690 MBB->addSuccessor(DummyExitBlk); 1691 DEBUG( 1692 dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() 1693 << " successors\n"; 1694 ); 1695 } 1696 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); 1697 } 1698 1699 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { 1700 while (MBB->succ_size()) 1701 MBB->removeSuccessor(*MBB->succ_begin()); 1702 } 1703 1704 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, 1705 int SccNum) { 1706 BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; 1707 if (!srcBlkInfo) 1708 srcBlkInfo = new BlockInformation(); 1709 srcBlkInfo->SccNum = SccNum; 1710 } 1711 1712 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { 1713 DEBUG( 1714 dbgs() << "Retiring BB" << MBB->getNumber() << "\n"; 1715 ); 1716 1717 BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; 1718 1719 if (!SrcBlkInfo) 1720 SrcBlkInfo = new BlockInformation(); 1721 1722 SrcBlkInfo->IsRetired = true; 1723 assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 1724 && "can't retire block yet"); 1725 } 1726 1727 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", 1728 "AMDGPU CFG Structurizer", false, false) 1729 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1730 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) 1731 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 1732 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", 1733 "AMDGPU CFG Structurizer", false, false) 1734 1735 FunctionPass *llvm::createAMDGPUCFGStructurizerPass() { 1736 return new AMDGPUCFGStructurizer(); 1737 } 1738