1 //===- AMDILCFGStructurizer.cpp - CFG Structurizer ------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //==-----------------------------------------------------------------------===// 8 9 #include "AMDGPU.h" 10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 11 #include "R600RegisterInfo.h" 12 #include "R600Subtarget.h" 13 #include "llvm/ADT/SCCIterator.h" 14 #include "llvm/ADT/Statistic.h" 15 #include "llvm/CodeGen/MachineFunction.h" 16 #include "llvm/CodeGen/MachineFunctionPass.h" 17 #include "llvm/CodeGen/MachineJumpTableInfo.h" 18 #include "llvm/CodeGen/MachineLoopInfo.h" 19 #include "llvm/CodeGen/MachinePostDominators.h" 20 #include "llvm/InitializePasses.h" 21 22 using namespace llvm; 23 24 #define DEBUG_TYPE "structcfg" 25 26 #define DEFAULT_VEC_SLOTS 8 27 28 // TODO: move-begin. 29 30 //===----------------------------------------------------------------------===// 31 // 32 // Statistics for CFGStructurizer. 33 // 34 //===----------------------------------------------------------------------===// 35 36 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 37 "matched"); 38 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 39 "matched"); 40 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 41 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 42 43 namespace llvm { 44 45 void initializeAMDGPUCFGStructurizerPass(PassRegistry &); 46 47 } // end namespace llvm 48 49 namespace { 50 51 //===----------------------------------------------------------------------===// 52 // 53 // Miscellaneous utility for CFGStructurizer. 54 // 55 //===----------------------------------------------------------------------===// 56 57 #define SHOWNEWINSTR(i) LLVM_DEBUG(dbgs() << "New instr: " << *i << "\n"); 58 59 #define SHOWNEWBLK(b, msg) \ 60 LLVM_DEBUG(dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 61 dbgs() << "\n";); 62 63 #define SHOWBLK_DETAIL(b, msg) \ 64 LLVM_DEBUG(if (b) { \ 65 dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 66 b->print(dbgs()); \ 67 dbgs() << "\n"; \ 68 }); 69 70 #define INVALIDSCCNUM -1 71 72 //===----------------------------------------------------------------------===// 73 // 74 // supporting data structure for CFGStructurizer 75 // 76 //===----------------------------------------------------------------------===// 77 78 class BlockInformation { 79 public: 80 bool IsRetired = false; 81 int SccNum = INVALIDSCCNUM; 82 83 BlockInformation() = default; 84 }; 85 86 //===----------------------------------------------------------------------===// 87 // 88 // CFGStructurizer 89 // 90 //===----------------------------------------------------------------------===// 91 92 class AMDGPUCFGStructurizer : public MachineFunctionPass { 93 public: 94 using MBBVector = SmallVector<MachineBasicBlock *, 32>; 95 using MBBInfoMap = std::map<MachineBasicBlock *, BlockInformation *>; 96 using LoopLandInfoMap = std::map<MachineLoop *, MachineBasicBlock *>; 97 98 enum PathToKind { 99 Not_SinglePath = 0, 100 SinglePath_InPath = 1, 101 SinglePath_NotInPath = 2 102 }; 103 104 static char ID; 105 106 AMDGPUCFGStructurizer() : MachineFunctionPass(ID) { 107 initializeAMDGPUCFGStructurizerPass(*PassRegistry::getPassRegistry()); 108 } 109 110 StringRef getPassName() const override { 111 return "AMDGPU Control Flow Graph structurizer Pass"; 112 } 113 114 void getAnalysisUsage(AnalysisUsage &AU) const override { 115 AU.addRequired<MachineDominatorTree>(); 116 AU.addRequired<MachinePostDominatorTree>(); 117 AU.addRequired<MachineLoopInfo>(); 118 MachineFunctionPass::getAnalysisUsage(AU); 119 } 120 121 /// Perform the CFG structurization 122 bool run(); 123 124 /// Perform the CFG preparation 125 /// This step will remove every unconditionnal/dead jump instructions and make 126 /// sure all loops have an exit block 127 bool prepare(); 128 129 bool runOnMachineFunction(MachineFunction &MF) override { 130 TII = MF.getSubtarget<R600Subtarget>().getInstrInfo(); 131 TRI = &TII->getRegisterInfo(); 132 LLVM_DEBUG(MF.dump();); 133 OrderedBlks.clear(); 134 Visited.clear(); 135 FuncRep = &MF; 136 MLI = &getAnalysis<MachineLoopInfo>(); 137 LLVM_DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI);); 138 MDT = &getAnalysis<MachineDominatorTree>(); 139 LLVM_DEBUG(MDT->print(dbgs(), (const Module *)nullptr);); 140 PDT = &getAnalysis<MachinePostDominatorTree>(); 141 LLVM_DEBUG(PDT->print(dbgs());); 142 prepare(); 143 run(); 144 LLVM_DEBUG(MF.dump();); 145 return true; 146 } 147 148 protected: 149 MachineDominatorTree *MDT; 150 MachinePostDominatorTree *PDT; 151 MachineLoopInfo *MLI; 152 const R600InstrInfo *TII = nullptr; 153 const R600RegisterInfo *TRI = nullptr; 154 155 // PRINT FUNCTIONS 156 /// Print the ordered Blocks. 157 void printOrderedBlocks() const { 158 size_t i = 0; 159 for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(), 160 iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) { 161 dbgs() << "BB" << (*iterBlk)->getNumber(); 162 dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 163 if (i != 0 && i % 10 == 0) { 164 dbgs() << "\n"; 165 } else { 166 dbgs() << " "; 167 } 168 } 169 } 170 171 static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) { 172 for (MachineLoop::iterator iter = LoopInfo.begin(), 173 iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) { 174 (*iter)->print(dbgs(), 0); 175 } 176 } 177 178 // UTILITY FUNCTIONS 179 int getSCCNum(MachineBasicBlock *MBB) const; 180 MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const; 181 bool hasBackEdge(MachineBasicBlock *MBB) const; 182 bool isRetiredBlock(MachineBasicBlock *MBB) const; 183 bool isActiveLoophead(MachineBasicBlock *MBB) const; 184 PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 185 bool AllowSideEntry = true) const; 186 int countActiveBlock(MBBVector::const_iterator It, 187 MBBVector::const_iterator E) const; 188 bool needMigrateBlock(MachineBasicBlock *MBB) const; 189 190 // Utility Functions 191 void reversePredicateSetter(MachineBasicBlock::iterator I, 192 MachineBasicBlock &MBB); 193 /// Compute the reversed DFS post order of Blocks 194 void orderBlocks(MachineFunction *MF); 195 196 // Function originally from CFGStructTraits 197 void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode, 198 const DebugLoc &DL = DebugLoc()); 199 MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode, 200 const DebugLoc &DL = DebugLoc()); 201 MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode); 202 void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode, 203 const DebugLoc &DL); 204 void insertCondBranchBefore(MachineBasicBlock *MBB, 205 MachineBasicBlock::iterator I, int NewOpcode, 206 int RegNum, const DebugLoc &DL); 207 208 static int getBranchNzeroOpcode(int OldOpcode); 209 static int getBranchZeroOpcode(int OldOpcode); 210 static int getContinueNzeroOpcode(int OldOpcode); 211 static int getContinueZeroOpcode(int OldOpcode); 212 static MachineBasicBlock *getTrueBranch(MachineInstr *MI); 213 static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB); 214 static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB, 215 MachineInstr *MI); 216 static bool isCondBranch(MachineInstr *MI); 217 static bool isUncondBranch(MachineInstr *MI); 218 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB); 219 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB); 220 221 /// The correct naming for this is getPossibleLoopendBlockBranchInstr. 222 /// 223 /// BB with backward-edge could have move instructions after the branch 224 /// instruction. Such move instruction "belong to" the loop backward-edge. 225 MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB); 226 227 static MachineInstr *getReturnInstr(MachineBasicBlock *MBB); 228 static bool isReturnBlock(MachineBasicBlock *MBB); 229 static void cloneSuccessorList(MachineBasicBlock *DstMBB, 230 MachineBasicBlock *SrcMBB); 231 static MachineBasicBlock *clone(MachineBasicBlock *MBB); 232 233 /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose 234 /// because the AMDGPU instruction is not recognized as terminator fix this 235 /// and retire this routine 236 void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB, 237 MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk); 238 239 static void wrapup(MachineBasicBlock *MBB); 240 241 int patternMatch(MachineBasicBlock *MBB); 242 int patternMatchGroup(MachineBasicBlock *MBB); 243 int serialPatternMatch(MachineBasicBlock *MBB); 244 int ifPatternMatch(MachineBasicBlock *MBB); 245 int loopendPatternMatch(); 246 int mergeLoop(MachineLoop *LoopRep); 247 248 /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in 249 /// the same loop with LoopLandInfo without explicitly keeping track of 250 /// loopContBlks and loopBreakBlks, this is a method to get the information. 251 bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB, 252 MachineBasicBlock *Src2MBB); 253 int handleJumpintoIf(MachineBasicBlock *HeadMBB, 254 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 255 int handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 256 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB); 257 int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 258 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 259 MachineBasicBlock **LandMBBPtr); 260 void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 261 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 262 MachineBasicBlock *LandMBB, bool Detail = false); 263 int cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 264 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB); 265 void mergeSerialBlock(MachineBasicBlock *DstMBB, 266 MachineBasicBlock *SrcMBB); 267 268 void mergeIfthenelseBlock(MachineInstr *BranchMI, 269 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 270 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB); 271 void mergeLooplandBlock(MachineBasicBlock *DstMBB, 272 MachineBasicBlock *LandMBB); 273 void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 274 MachineBasicBlock *LandMBB); 275 void settleLoopcontBlock(MachineBasicBlock *ContingMBB, 276 MachineBasicBlock *ContMBB); 277 278 /// normalizeInfiniteLoopExit change 279 /// B1: 280 /// uncond_br LoopHeader 281 /// 282 /// to 283 /// B1: 284 /// cond_br 1 LoopHeader dummyExit 285 /// and return the newly added dummy exit block 286 MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep); 287 void removeUnconditionalBranch(MachineBasicBlock *MBB); 288 289 /// Remove duplicate branches instructions in a block. 290 /// For instance 291 /// B0: 292 /// cond_br X B1 B2 293 /// cond_br X B1 B2 294 /// is transformed to 295 /// B0: 296 /// cond_br X B1 B2 297 void removeRedundantConditionalBranch(MachineBasicBlock *MBB); 298 299 void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB); 300 void removeSuccessor(MachineBasicBlock *MBB); 301 MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB, 302 MachineBasicBlock *PredMBB); 303 void migrateInstruction(MachineBasicBlock *SrcMBB, 304 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I); 305 void recordSccnum(MachineBasicBlock *MBB, int SCCNum); 306 void retireBlock(MachineBasicBlock *MBB); 307 308 private: 309 MBBInfoMap BlockInfoMap; 310 LoopLandInfoMap LLInfoMap; 311 std::map<MachineLoop *, bool> Visited; 312 MachineFunction *FuncRep; 313 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks; 314 }; 315 316 } // end anonymous namespace 317 318 char AMDGPUCFGStructurizer::ID = 0; 319 320 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const { 321 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 322 if (It == BlockInfoMap.end()) 323 return INVALIDSCCNUM; 324 return (*It).second->SccNum; 325 } 326 327 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep) 328 const { 329 LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep); 330 if (It == LLInfoMap.end()) 331 return nullptr; 332 return (*It).second; 333 } 334 335 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const { 336 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 337 if (!LoopRep) 338 return false; 339 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 340 return MBB->isSuccessor(LoopHeader); 341 } 342 343 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const { 344 MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB); 345 if (It == BlockInfoMap.end()) 346 return false; 347 return (*It).second->IsRetired; 348 } 349 350 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const { 351 MachineLoop *LoopRep = MLI->getLoopFor(MBB); 352 while (LoopRep && LoopRep->getHeader() == MBB) { 353 MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep); 354 if(!LoopLand) 355 return true; 356 if (!isRetiredBlock(LoopLand)) 357 return true; 358 LoopRep = LoopRep->getParentLoop(); 359 } 360 return false; 361 } 362 363 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo( 364 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB, 365 bool AllowSideEntry) const { 366 assert(DstMBB); 367 if (SrcMBB == DstMBB) 368 return SinglePath_InPath; 369 while (SrcMBB && SrcMBB->succ_size() == 1) { 370 SrcMBB = *SrcMBB->succ_begin(); 371 if (SrcMBB == DstMBB) 372 return SinglePath_InPath; 373 if (!AllowSideEntry && SrcMBB->pred_size() > 1) 374 return Not_SinglePath; 375 } 376 if (SrcMBB && SrcMBB->succ_size()==0) 377 return SinglePath_NotInPath; 378 return Not_SinglePath; 379 } 380 381 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It, 382 MBBVector::const_iterator E) const { 383 int Count = 0; 384 while (It != E) { 385 if (!isRetiredBlock(*It)) 386 ++Count; 387 ++It; 388 } 389 return Count; 390 } 391 392 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const { 393 unsigned BlockSizeThreshold = 30; 394 unsigned CloneInstrThreshold = 100; 395 bool MultiplePreds = MBB && (MBB->pred_size() > 1); 396 397 if(!MultiplePreds) 398 return false; 399 unsigned BlkSize = MBB->size(); 400 return ((BlkSize > BlockSizeThreshold) && 401 (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold)); 402 } 403 404 void AMDGPUCFGStructurizer::reversePredicateSetter( 405 MachineBasicBlock::iterator I, MachineBasicBlock &MBB) { 406 assert(I.isValid() && "Expected valid iterator"); 407 for (;; --I) { 408 if (I == MBB.end()) 409 continue; 410 if (I->getOpcode() == R600::PRED_X) { 411 switch (I->getOperand(2).getImm()) { 412 case R600::PRED_SETE_INT: 413 I->getOperand(2).setImm(R600::PRED_SETNE_INT); 414 return; 415 case R600::PRED_SETNE_INT: 416 I->getOperand(2).setImm(R600::PRED_SETE_INT); 417 return; 418 case R600::PRED_SETE: 419 I->getOperand(2).setImm(R600::PRED_SETNE); 420 return; 421 case R600::PRED_SETNE: 422 I->getOperand(2).setImm(R600::PRED_SETE); 423 return; 424 default: 425 llvm_unreachable("PRED_X Opcode invalid!"); 426 } 427 } 428 } 429 } 430 431 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB, 432 int NewOpcode, const DebugLoc &DL) { 433 MachineInstr *MI = 434 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 435 MBB->push_back(MI); 436 //assume the instruction doesn't take any reg operand ... 437 SHOWNEWINSTR(MI); 438 } 439 440 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB, 441 int NewOpcode, 442 const DebugLoc &DL) { 443 MachineInstr *MI = 444 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL); 445 if (!MBB->empty()) 446 MBB->insert(MBB->begin(), MI); 447 else 448 MBB->push_back(MI); 449 SHOWNEWINSTR(MI); 450 return MI; 451 } 452 453 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore( 454 MachineBasicBlock::iterator I, int NewOpcode) { 455 MachineInstr *OldMI = &(*I); 456 MachineBasicBlock *MBB = OldMI->getParent(); 457 MachineInstr *NewMBB = 458 MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc()); 459 MBB->insert(I, NewMBB); 460 //assume the instruction doesn't take any reg operand ... 461 SHOWNEWINSTR(NewMBB); 462 return NewMBB; 463 } 464 465 void AMDGPUCFGStructurizer::insertCondBranchBefore( 466 MachineBasicBlock::iterator I, int NewOpcode, const DebugLoc &DL) { 467 MachineInstr *OldMI = &(*I); 468 MachineBasicBlock *MBB = OldMI->getParent(); 469 MachineFunction *MF = MBB->getParent(); 470 MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 471 MBB->insert(I, NewMI); 472 MachineInstrBuilder MIB(*MF, NewMI); 473 MIB.addReg(OldMI->getOperand(1).getReg(), false); 474 SHOWNEWINSTR(NewMI); 475 //erase later oldInstr->eraseFromParent(); 476 } 477 478 void AMDGPUCFGStructurizer::insertCondBranchBefore( 479 MachineBasicBlock *blk, MachineBasicBlock::iterator I, int NewOpcode, 480 int RegNum, const DebugLoc &DL) { 481 MachineFunction *MF = blk->getParent(); 482 MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL); 483 //insert before 484 blk->insert(I, NewInstr); 485 MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false); 486 SHOWNEWINSTR(NewInstr); 487 } 488 489 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) { 490 switch(OldOpcode) { 491 case R600::JUMP_COND: 492 case R600::JUMP: return R600::IF_PREDICATE_SET; 493 case R600::BRANCH_COND_i32: 494 case R600::BRANCH_COND_f32: return R600::IF_LOGICALNZ_f32; 495 default: llvm_unreachable("internal error"); 496 } 497 return -1; 498 } 499 500 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) { 501 switch(OldOpcode) { 502 case R600::JUMP_COND: 503 case R600::JUMP: return R600::IF_PREDICATE_SET; 504 case R600::BRANCH_COND_i32: 505 case R600::BRANCH_COND_f32: return R600::IF_LOGICALZ_f32; 506 default: llvm_unreachable("internal error"); 507 } 508 return -1; 509 } 510 511 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) { 512 switch(OldOpcode) { 513 case R600::JUMP_COND: 514 case R600::JUMP: return R600::CONTINUE_LOGICALNZ_i32; 515 default: llvm_unreachable("internal error"); 516 } 517 return -1; 518 } 519 520 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) { 521 switch(OldOpcode) { 522 case R600::JUMP_COND: 523 case R600::JUMP: return R600::CONTINUE_LOGICALZ_i32; 524 default: llvm_unreachable("internal error"); 525 } 526 return -1; 527 } 528 529 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) { 530 return MI->getOperand(0).getMBB(); 531 } 532 533 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI, 534 MachineBasicBlock *MBB) { 535 MI->getOperand(0).setMBB(MBB); 536 } 537 538 MachineBasicBlock * 539 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB, 540 MachineInstr *MI) { 541 assert(MBB->succ_size() == 2); 542 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 543 MachineBasicBlock::succ_iterator It = MBB->succ_begin(); 544 MachineBasicBlock::succ_iterator Next = It; 545 ++Next; 546 return (*It == TrueBranch) ? *Next : *It; 547 } 548 549 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) { 550 switch (MI->getOpcode()) { 551 case R600::JUMP_COND: 552 case R600::BRANCH_COND_i32: 553 case R600::BRANCH_COND_f32: return true; 554 default: 555 return false; 556 } 557 return false; 558 } 559 560 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) { 561 switch (MI->getOpcode()) { 562 case R600::JUMP: 563 case R600::BRANCH: 564 return true; 565 default: 566 return false; 567 } 568 return false; 569 } 570 571 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) { 572 //get DebugLoc from the first MachineBasicBlock instruction with debug info 573 DebugLoc DL; 574 for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end(); 575 ++It) { 576 MachineInstr *instr = &(*It); 577 if (instr->getDebugLoc()) 578 DL = instr->getDebugLoc(); 579 } 580 return DL; 581 } 582 583 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr( 584 MachineBasicBlock *MBB) { 585 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 586 MachineInstr *MI = &*It; 587 if (MI && (isCondBranch(MI) || isUncondBranch(MI))) 588 return MI; 589 return nullptr; 590 } 591 592 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr( 593 MachineBasicBlock *MBB) { 594 for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend(); 595 It != E; ++It) { 596 // FIXME: Simplify 597 MachineInstr *MI = &*It; 598 if (MI) { 599 if (isCondBranch(MI) || isUncondBranch(MI)) 600 return MI; 601 else if (!TII->isMov(MI->getOpcode())) 602 break; 603 } 604 } 605 return nullptr; 606 } 607 608 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) { 609 MachineBasicBlock::reverse_iterator It = MBB->rbegin(); 610 if (It != MBB->rend()) { 611 MachineInstr *instr = &(*It); 612 if (instr->getOpcode() == R600::RETURN) 613 return instr; 614 } 615 return nullptr; 616 } 617 618 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) { 619 MachineInstr *MI = getReturnInstr(MBB); 620 bool IsReturn = (MBB->succ_size() == 0); 621 if (MI) 622 assert(IsReturn); 623 else if (IsReturn) 624 LLVM_DEBUG(dbgs() << "BB" << MBB->getNumber() 625 << " is return block without RETURN instr\n";); 626 return IsReturn; 627 } 628 629 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB, 630 MachineBasicBlock *SrcMBB) { 631 for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(), 632 iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It) 633 DstMBB->addSuccessor(*It); // *iter's predecessor is also taken care of 634 } 635 636 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) { 637 MachineFunction *Func = MBB->getParent(); 638 MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock(); 639 Func->push_back(NewMBB); //insert to function 640 for (const MachineInstr &It : *MBB) 641 NewMBB->push_back(Func->CloneMachineInstr(&It)); 642 return NewMBB; 643 } 644 645 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith( 646 MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB, 647 MachineBasicBlock *NewBlk) { 648 MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB); 649 if (BranchMI && isCondBranch(BranchMI) && 650 getTrueBranch(BranchMI) == OldMBB) 651 setTrueBranch(BranchMI, NewBlk); 652 } 653 654 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) { 655 assert((!MBB->getParent()->getJumpTableInfo() 656 || MBB->getParent()->getJumpTableInfo()->isEmpty()) 657 && "found a jump table"); 658 659 //collect continue right before endloop 660 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr; 661 MachineBasicBlock::iterator Pre = MBB->begin(); 662 MachineBasicBlock::iterator E = MBB->end(); 663 MachineBasicBlock::iterator It = Pre; 664 while (It != E) { 665 if (Pre->getOpcode() == R600::CONTINUE 666 && It->getOpcode() == R600::ENDLOOP) 667 ContInstr.push_back(&*Pre); 668 Pre = It; 669 ++It; 670 } 671 672 //delete continue right before endloop 673 for (unsigned i = 0; i < ContInstr.size(); ++i) 674 ContInstr[i]->eraseFromParent(); 675 676 // TODO to fix up jump table so later phase won't be confused. if 677 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 678 // there isn't such an interface yet. alternatively, replace all the other 679 // blocks in the jump table with the entryBlk //} 680 } 681 682 bool AMDGPUCFGStructurizer::prepare() { 683 bool Changed = false; 684 685 //FIXME: if not reducible flow graph, make it so ??? 686 687 LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";); 688 689 orderBlocks(FuncRep); 690 691 SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks; 692 693 // Add an ExitBlk to loop that don't have one 694 for (MachineLoopInfo::iterator It = MLI->begin(), 695 E = MLI->end(); It != E; ++It) { 696 MachineLoop *LoopRep = (*It); 697 MBBVector ExitingMBBs; 698 LoopRep->getExitingBlocks(ExitingMBBs); 699 700 if (ExitingMBBs.size() == 0) { 701 MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep); 702 if (DummyExitBlk) 703 RetBlks.push_back(DummyExitBlk); 704 } 705 } 706 707 // Remove unconditional branch instr. 708 // Add dummy exit block iff there are multiple returns. 709 for (SmallVectorImpl<MachineBasicBlock *>::const_iterator 710 It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) { 711 MachineBasicBlock *MBB = *It; 712 removeUnconditionalBranch(MBB); 713 removeRedundantConditionalBranch(MBB); 714 if (isReturnBlock(MBB)) { 715 RetBlks.push_back(MBB); 716 } 717 assert(MBB->succ_size() <= 2); 718 } 719 720 if (RetBlks.size() >= 2) { 721 addDummyExitBlock(RetBlks); 722 Changed = true; 723 } 724 725 return Changed; 726 } 727 728 bool AMDGPUCFGStructurizer::run() { 729 //Assume reducible CFG... 730 LLVM_DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n"); 731 732 #ifdef STRESSTEST 733 //Use the worse block ordering to test the algorithm. 734 ReverseVector(orderedBlks); 735 #endif 736 737 LLVM_DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks();); 738 int NumIter = 0; 739 bool Finish = false; 740 MachineBasicBlock *MBB; 741 bool MakeProgress = false; 742 int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(), 743 OrderedBlks.end()); 744 745 do { 746 ++NumIter; 747 LLVM_DEBUG(dbgs() << "numIter = " << NumIter 748 << ", numRemaintedBlk = " << NumRemainedBlk << "\n";); 749 750 SmallVectorImpl<MachineBasicBlock *>::const_iterator It = 751 OrderedBlks.begin(); 752 SmallVectorImpl<MachineBasicBlock *>::const_iterator E = 753 OrderedBlks.end(); 754 755 SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter = 756 It; 757 MachineBasicBlock *SccBeginMBB = nullptr; 758 int SccNumBlk = 0; // The number of active blocks, init to a 759 // maximum possible number. 760 int SccNumIter; // Number of iteration in this SCC. 761 762 while (It != E) { 763 MBB = *It; 764 765 if (!SccBeginMBB) { 766 SccBeginIter = It; 767 SccBeginMBB = MBB; 768 SccNumIter = 0; 769 SccNumBlk = NumRemainedBlk; // Init to maximum possible number. 770 LLVM_DEBUG(dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB); 771 dbgs() << "\n";); 772 } 773 774 if (!isRetiredBlock(MBB)) 775 patternMatch(MBB); 776 777 ++It; 778 779 bool ContNextScc = true; 780 if (It == E 781 || getSCCNum(SccBeginMBB) != getSCCNum(*It)) { 782 // Just finish one scc. 783 ++SccNumIter; 784 int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It); 785 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) { 786 LLVM_DEBUG(dbgs() << "Can't reduce SCC " << getSCCNum(MBB) 787 << ", sccNumIter = " << SccNumIter; 788 dbgs() << "doesn't make any progress\n";); 789 ContNextScc = true; 790 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) { 791 SccNumBlk = sccRemainedNumBlk; 792 It = SccBeginIter; 793 ContNextScc = false; 794 LLVM_DEBUG(dbgs() << "repeat processing SCC" << getSCCNum(MBB) 795 << "sccNumIter = " << SccNumIter << '\n';); 796 } else { 797 // Finish the current scc. 798 ContNextScc = true; 799 } 800 } else { 801 // Continue on next component in the current scc. 802 ContNextScc = false; 803 } 804 805 if (ContNextScc) 806 SccBeginMBB = nullptr; 807 } //while, "one iteration" over the function. 808 809 MachineBasicBlock *EntryMBB = 810 *GraphTraits<MachineFunction *>::nodes_begin(FuncRep); 811 if (EntryMBB->succ_size() == 0) { 812 Finish = true; 813 LLVM_DEBUG(dbgs() << "Reduce to one block\n";); 814 } else { 815 int NewnumRemainedBlk 816 = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end()); 817 // consider cloned blocks ?? 818 if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) { 819 MakeProgress = true; 820 NumRemainedBlk = NewnumRemainedBlk; 821 } else { 822 MakeProgress = false; 823 LLVM_DEBUG(dbgs() << "No progress\n";); 824 } 825 } 826 } while (!Finish && MakeProgress); 827 828 // Misc wrap up to maintain the consistency of the Function representation. 829 wrapup(*GraphTraits<MachineFunction *>::nodes_begin(FuncRep)); 830 831 // Detach retired Block, release memory. 832 for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end(); 833 It != E; ++It) { 834 if ((*It).second && (*It).second->IsRetired) { 835 assert(((*It).first)->getNumber() != -1); 836 LLVM_DEBUG(dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";); 837 (*It).first->eraseFromParent(); //Remove from the parent Function. 838 } 839 delete (*It).second; 840 } 841 BlockInfoMap.clear(); 842 LLInfoMap.clear(); 843 844 if (!Finish) { 845 LLVM_DEBUG(FuncRep->viewCFG()); 846 report_fatal_error("IRREDUCIBLE_CFG"); 847 } 848 849 return true; 850 } 851 852 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) { 853 int SccNum = 0; 854 MachineBasicBlock *MBB; 855 for (scc_iterator<MachineFunction *> It = scc_begin(MF); !It.isAtEnd(); 856 ++It, ++SccNum) { 857 const std::vector<MachineBasicBlock *> &SccNext = *It; 858 for (std::vector<MachineBasicBlock *>::const_iterator 859 blockIter = SccNext.begin(), blockEnd = SccNext.end(); 860 blockIter != blockEnd; ++blockIter) { 861 MBB = *blockIter; 862 OrderedBlks.push_back(MBB); 863 recordSccnum(MBB, SccNum); 864 } 865 } 866 867 // walk through all the block in func to check for unreachable 868 for (auto *MBB : nodes(MF)) { 869 SccNum = getSCCNum(MBB); 870 if (SccNum == INVALIDSCCNUM) 871 dbgs() << "unreachable block BB" << MBB->getNumber() << "\n"; 872 } 873 } 874 875 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) { 876 int NumMatch = 0; 877 int CurMatch; 878 879 LLVM_DEBUG(dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";); 880 881 while ((CurMatch = patternMatchGroup(MBB)) > 0) 882 NumMatch += CurMatch; 883 884 LLVM_DEBUG(dbgs() << "End patternMatch BB" << MBB->getNumber() 885 << ", numMatch = " << NumMatch << "\n";); 886 887 return NumMatch; 888 } 889 890 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) { 891 int NumMatch = 0; 892 NumMatch += loopendPatternMatch(); 893 NumMatch += serialPatternMatch(MBB); 894 NumMatch += ifPatternMatch(MBB); 895 return NumMatch; 896 } 897 898 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) { 899 if (MBB->succ_size() != 1) 900 return 0; 901 902 MachineBasicBlock *childBlk = *MBB->succ_begin(); 903 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) 904 return 0; 905 906 mergeSerialBlock(MBB, childBlk); 907 ++numSerialPatternMatch; 908 return 1; 909 } 910 911 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) { 912 //two edges 913 if (MBB->succ_size() != 2) 914 return 0; 915 if (hasBackEdge(MBB)) 916 return 0; 917 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 918 if (!BranchMI) 919 return 0; 920 921 assert(isCondBranch(BranchMI)); 922 int NumMatch = 0; 923 924 MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI); 925 NumMatch += serialPatternMatch(TrueMBB); 926 NumMatch += ifPatternMatch(TrueMBB); 927 MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI); 928 NumMatch += serialPatternMatch(FalseMBB); 929 NumMatch += ifPatternMatch(FalseMBB); 930 MachineBasicBlock *LandBlk; 931 int Cloned = 0; 932 933 assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty()); 934 // TODO: Simplify 935 if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1 936 && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) { 937 // Diamond pattern 938 LandBlk = *TrueMBB->succ_begin(); 939 } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) { 940 // Triangle pattern, false is empty 941 LandBlk = FalseMBB; 942 FalseMBB = nullptr; 943 } else if (FalseMBB->succ_size() == 1 944 && *FalseMBB->succ_begin() == TrueMBB) { 945 // Triangle pattern, true is empty 946 // We reverse the predicate to make a triangle, empty false pattern; 947 std::swap(TrueMBB, FalseMBB); 948 reversePredicateSetter(MBB->end(), *MBB); 949 LandBlk = FalseMBB; 950 FalseMBB = nullptr; 951 } else if (FalseMBB->succ_size() == 1 952 && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) { 953 LandBlk = *FalseMBB->succ_begin(); 954 } else if (TrueMBB->succ_size() == 1 955 && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) { 956 LandBlk = *TrueMBB->succ_begin(); 957 } else { 958 return NumMatch + handleJumpintoIf(MBB, TrueMBB, FalseMBB); 959 } 960 961 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 962 // new BB created for landBlk==NULL may introduce new challenge to the 963 // reduction process. 964 if (LandBlk && 965 ((TrueMBB && TrueMBB->pred_size() > 1) 966 || (FalseMBB && FalseMBB->pred_size() > 1))) { 967 Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk); 968 } 969 970 if (TrueMBB && TrueMBB->pred_size() > 1) { 971 TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB); 972 ++Cloned; 973 } 974 975 if (FalseMBB && FalseMBB->pred_size() > 1) { 976 FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB); 977 ++Cloned; 978 } 979 980 mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk); 981 982 ++numIfPatternMatch; 983 984 numClonedBlock += Cloned; 985 986 return 1 + Cloned + NumMatch; 987 } 988 989 int AMDGPUCFGStructurizer::loopendPatternMatch() { 990 std::deque<MachineLoop *> NestedLoops; 991 for (auto &It: *MLI) 992 for (MachineLoop *ML : depth_first(It)) 993 NestedLoops.push_front(ML); 994 995 if (NestedLoops.empty()) 996 return 0; 997 998 // Process nested loop outside->inside (we did push_front), 999 // so "continue" to a outside loop won't be mistaken as "break" 1000 // of the current loop. 1001 int Num = 0; 1002 for (MachineLoop *ExaminedLoop : NestedLoops) { 1003 if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop]) 1004 continue; 1005 LLVM_DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump();); 1006 int NumBreak = mergeLoop(ExaminedLoop); 1007 if (NumBreak == -1) 1008 break; 1009 Num += NumBreak; 1010 } 1011 return Num; 1012 } 1013 1014 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) { 1015 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1016 MBBVector ExitingMBBs; 1017 LoopRep->getExitingBlocks(ExitingMBBs); 1018 assert(!ExitingMBBs.empty() && "Infinite Loop not supported"); 1019 LLVM_DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() 1020 << " exiting blocks\n";); 1021 // We assume a single ExitBlk 1022 MBBVector ExitBlks; 1023 LoopRep->getExitBlocks(ExitBlks); 1024 SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet; 1025 for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i) 1026 ExitBlkSet.insert(ExitBlks[i]); 1027 assert(ExitBlkSet.size() == 1); 1028 MachineBasicBlock *ExitBlk = *ExitBlks.begin(); 1029 assert(ExitBlk && "Loop has several exit block"); 1030 MBBVector LatchBlks; 1031 for (auto *LB : inverse_children<MachineBasicBlock*>(LoopHeader)) 1032 if (LoopRep->contains(LB)) 1033 LatchBlks.push_back(LB); 1034 1035 for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i) 1036 mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk); 1037 for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i) 1038 settleLoopcontBlock(LatchBlks[i], LoopHeader); 1039 int Match = 0; 1040 do { 1041 Match = 0; 1042 Match += serialPatternMatch(LoopHeader); 1043 Match += ifPatternMatch(LoopHeader); 1044 } while (Match > 0); 1045 mergeLooplandBlock(LoopHeader, ExitBlk); 1046 MachineLoop *ParentLoop = LoopRep->getParentLoop(); 1047 if (ParentLoop) 1048 MLI->changeLoopFor(LoopHeader, ParentLoop); 1049 else 1050 MLI->removeBlock(LoopHeader); 1051 Visited[LoopRep] = true; 1052 return 1; 1053 } 1054 1055 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak( 1056 MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) { 1057 if (Src1MBB->succ_size() == 0) { 1058 MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB); 1059 if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) { 1060 MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep]; 1061 if (TheEntry) { 1062 LLVM_DEBUG(dbgs() << "isLoopContBreakBlock yes src1 = BB" 1063 << Src1MBB->getNumber() << " src2 = BB" 1064 << Src2MBB->getNumber() << "\n";); 1065 return true; 1066 } 1067 } 1068 } 1069 return false; 1070 } 1071 1072 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB, 1073 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1074 int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB); 1075 if (Num == 0) { 1076 LLVM_DEBUG(dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" 1077 << "\n";); 1078 Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB); 1079 } 1080 return Num; 1081 } 1082 1083 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB, 1084 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) { 1085 int Num = 0; 1086 MachineBasicBlock *DownBlk; 1087 1088 //trueBlk could be the common post dominator 1089 DownBlk = TrueMBB; 1090 1091 LLVM_DEBUG(dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber() 1092 << " true = BB" << TrueMBB->getNumber() 1093 << ", numSucc=" << TrueMBB->succ_size() << " false = BB" 1094 << FalseMBB->getNumber() << "\n";); 1095 1096 while (DownBlk) { 1097 LLVM_DEBUG(dbgs() << "check down = BB" << DownBlk->getNumber();); 1098 1099 if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) { 1100 LLVM_DEBUG(dbgs() << " working\n";); 1101 1102 Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk); 1103 Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk); 1104 1105 numClonedBlock += Num; 1106 Num += serialPatternMatch(*HeadMBB->succ_begin()); 1107 Num += serialPatternMatch(*std::next(HeadMBB->succ_begin())); 1108 Num += ifPatternMatch(HeadMBB); 1109 assert(Num > 0); 1110 1111 break; 1112 } 1113 LLVM_DEBUG(dbgs() << " not working\n";); 1114 DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : nullptr; 1115 } // walk down the postDomTree 1116 1117 return Num; 1118 } 1119 1120 #ifndef NDEBUG 1121 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf( 1122 MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB, 1123 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) { 1124 dbgs() << "head = BB" << HeadMBB->getNumber() 1125 << " size = " << HeadMBB->size(); 1126 if (Detail) { 1127 dbgs() << "\n"; 1128 HeadMBB->print(dbgs()); 1129 dbgs() << "\n"; 1130 } 1131 1132 if (TrueMBB) { 1133 dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = " 1134 << TrueMBB->size() << " numPred = " << TrueMBB->pred_size(); 1135 if (Detail) { 1136 dbgs() << "\n"; 1137 TrueMBB->print(dbgs()); 1138 dbgs() << "\n"; 1139 } 1140 } 1141 if (FalseMBB) { 1142 dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = " 1143 << FalseMBB->size() << " numPred = " << FalseMBB->pred_size(); 1144 if (Detail) { 1145 dbgs() << "\n"; 1146 FalseMBB->print(dbgs()); 1147 dbgs() << "\n"; 1148 } 1149 } 1150 if (LandMBB) { 1151 dbgs() << ", land = BB" << LandMBB->getNumber() << " size = " 1152 << LandMBB->size() << " numPred = " << LandMBB->pred_size(); 1153 if (Detail) { 1154 dbgs() << "\n"; 1155 LandMBB->print(dbgs()); 1156 dbgs() << "\n"; 1157 } 1158 } 1159 1160 dbgs() << "\n"; 1161 } 1162 #endif 1163 1164 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB, 1165 MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB, 1166 MachineBasicBlock **LandMBBPtr) { 1167 bool MigrateTrue = false; 1168 bool MigrateFalse = false; 1169 1170 MachineBasicBlock *LandBlk = *LandMBBPtr; 1171 1172 assert((!TrueMBB || TrueMBB->succ_size() <= 1) 1173 && (!FalseMBB || FalseMBB->succ_size() <= 1)); 1174 1175 if (TrueMBB == FalseMBB) 1176 return 0; 1177 1178 MigrateTrue = needMigrateBlock(TrueMBB); 1179 MigrateFalse = needMigrateBlock(FalseMBB); 1180 1181 if (!MigrateTrue && !MigrateFalse) 1182 return 0; 1183 1184 // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1185 // have more than one predecessors. without doing this, its predecessor 1186 // rather than headBlk will have undefined value in initReg. 1187 if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1) 1188 MigrateTrue = true; 1189 if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1) 1190 MigrateFalse = true; 1191 1192 LLVM_DEBUG( 1193 dbgs() << "before improveSimpleJumpintoIf: "; 1194 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); 1195 1196 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1197 // 1198 // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1199 // {initReg = 0; org falseBlk branch } 1200 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1201 // => org landBlk 1202 // if landBlk->pred_size() > 2, put the about if-else inside 1203 // if (initReg !=2) {...} 1204 // 1205 // add initReg = initVal to headBlk 1206 1207 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1208 if (!MigrateTrue || !MigrateFalse) { 1209 // XXX: We have an opportunity here to optimize the "branch into if" case 1210 // here. Branch into if looks like this: 1211 // entry 1212 // / | 1213 // diamond_head branch_from 1214 // / \ | 1215 // diamond_false diamond_true 1216 // \ / 1217 // done 1218 // 1219 // The diamond_head block begins the "if" and the diamond_true block 1220 // is the block being "branched into". 1221 // 1222 // If MigrateTrue is true, then TrueBB is the block being "branched into" 1223 // and if MigrateFalse is true, then FalseBB is the block being 1224 // "branched into" 1225 // 1226 // Here is the pseudo code for how I think the optimization should work: 1227 // 1. Insert MOV GPR0, 0 before the branch instruction in diamond_head. 1228 // 2. Insert MOV GPR0, 1 before the branch instruction in branch_from. 1229 // 3. Move the branch instruction from diamond_head into its own basic 1230 // block (new_block). 1231 // 4. Add an unconditional branch from diamond_head to new_block 1232 // 5. Replace the branch instruction in branch_from with an unconditional 1233 // branch to new_block. If branch_from has multiple predecessors, then 1234 // we need to replace the True/False block in the branch 1235 // instruction instead of replacing it. 1236 // 6. Change the condition of the branch instruction in new_block from 1237 // COND to (COND || GPR0) 1238 // 1239 // In order insert these MOV instruction, we will need to use the 1240 // RegisterScavenger. Usually liveness stops being tracked during 1241 // the late machine optimization passes, however if we implement 1242 // bool TargetRegisterInfo::requiresRegisterScavenging( 1243 // const MachineFunction &MF) 1244 // and have it return true, liveness will be tracked correctly 1245 // by generic optimization passes. We will also need to make sure that 1246 // all of our target-specific passes that run after regalloc and before 1247 // the CFGStructurizer track liveness and we will need to modify this pass 1248 // to correctly track liveness. 1249 // 1250 // After the above changes, the new CFG should look like this: 1251 // entry 1252 // / | 1253 // diamond_head branch_from 1254 // \ / 1255 // new_block 1256 // / | 1257 // diamond_false diamond_true 1258 // \ / 1259 // done 1260 // 1261 // Without this optimization, we are forced to duplicate the diamond_true 1262 // block and we will end up with a CFG like this: 1263 // 1264 // entry 1265 // / | 1266 // diamond_head branch_from 1267 // / \ | 1268 // diamond_false diamond_true diamond_true (duplicate) 1269 // \ / | 1270 // done --------------------| 1271 // 1272 // Duplicating diamond_true can be very costly especially if it has a 1273 // lot of instructions. 1274 return 0; 1275 } 1276 1277 int NumNewBlk = 0; 1278 1279 bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2); 1280 1281 //insert R600::ENDIF to avoid special case "input landBlk == NULL" 1282 MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, R600::ENDIF); 1283 1284 if (LandBlkHasOtherPred) { 1285 report_fatal_error("Extra register needed to handle CFG"); 1286 Register CmpResReg = 1287 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1288 report_fatal_error("Extra compare instruction needed to handle CFG"); 1289 insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, 1290 CmpResReg, DebugLoc()); 1291 } 1292 1293 // XXX: We are running this after RA, so creating virtual registers will 1294 // cause an assertion failure in the PostRA scheduling pass. 1295 Register InitReg = 1296 HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC); 1297 insertCondBranchBefore(LandBlk, I, R600::IF_PREDICATE_SET, InitReg, 1298 DebugLoc()); 1299 1300 if (MigrateTrue) { 1301 migrateInstruction(TrueMBB, LandBlk, I); 1302 // need to uncondionally insert the assignment to ensure a path from its 1303 // predecessor rather than headBlk has valid value in initReg if 1304 // (initVal != 1). 1305 report_fatal_error("Extra register needed to handle CFG"); 1306 } 1307 insertInstrBefore(I, R600::ELSE); 1308 1309 if (MigrateFalse) { 1310 migrateInstruction(FalseMBB, LandBlk, I); 1311 // need to uncondionally insert the assignment to ensure a path from its 1312 // predecessor rather than headBlk has valid value in initReg if 1313 // (initVal != 0) 1314 report_fatal_error("Extra register needed to handle CFG"); 1315 } 1316 1317 if (LandBlkHasOtherPred) { 1318 // add endif 1319 insertInstrBefore(I, R600::ENDIF); 1320 1321 // put initReg = 2 to other predecessors of landBlk 1322 for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(), 1323 PE = LandBlk->pred_end(); PI != PE; ++PI) { 1324 MachineBasicBlock *MBB = *PI; 1325 if (MBB != TrueMBB && MBB != FalseMBB) 1326 report_fatal_error("Extra register needed to handle CFG"); 1327 } 1328 } 1329 LLVM_DEBUG( 1330 dbgs() << "result from improveSimpleJumpintoIf: "; 1331 showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);); 1332 1333 // update landBlk 1334 *LandMBBPtr = LandBlk; 1335 1336 return NumNewBlk; 1337 } 1338 1339 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB, 1340 MachineBasicBlock *SrcMBB) { 1341 LLVM_DEBUG(dbgs() << "serialPattern BB" << DstMBB->getNumber() << " <= BB" 1342 << SrcMBB->getNumber() << "\n";); 1343 DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end()); 1344 1345 DstMBB->removeSuccessor(SrcMBB, true); 1346 cloneSuccessorList(DstMBB, SrcMBB); 1347 1348 removeSuccessor(SrcMBB); 1349 MLI->removeBlock(SrcMBB); 1350 retireBlock(SrcMBB); 1351 } 1352 1353 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI, 1354 MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB, 1355 MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) { 1356 assert (TrueMBB); 1357 LLVM_DEBUG(dbgs() << "ifPattern BB" << MBB->getNumber(); dbgs() << "{ "; 1358 if (TrueMBB) { dbgs() << "BB" << TrueMBB->getNumber(); } dbgs() 1359 << " } else "; 1360 dbgs() << "{ "; if (FalseMBB) { 1361 dbgs() << "BB" << FalseMBB->getNumber(); 1362 } dbgs() << " }\n "; 1363 dbgs() << "landBlock: "; if (!LandMBB) { dbgs() << "NULL"; } else { 1364 dbgs() << "BB" << LandMBB->getNumber(); 1365 } dbgs() << "\n";); 1366 1367 int OldOpcode = BranchMI->getOpcode(); 1368 DebugLoc BranchDL = BranchMI->getDebugLoc(); 1369 1370 // transform to 1371 // if cond 1372 // trueBlk 1373 // else 1374 // falseBlk 1375 // endif 1376 // landBlk 1377 1378 MachineBasicBlock::iterator I = BranchMI; 1379 insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode), 1380 BranchDL); 1381 1382 if (TrueMBB) { 1383 MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end()); 1384 MBB->removeSuccessor(TrueMBB, true); 1385 if (LandMBB && TrueMBB->succ_size()!=0) 1386 TrueMBB->removeSuccessor(LandMBB, true); 1387 retireBlock(TrueMBB); 1388 MLI->removeBlock(TrueMBB); 1389 } 1390 1391 if (FalseMBB) { 1392 insertInstrBefore(I, R600::ELSE); 1393 MBB->splice(I, FalseMBB, FalseMBB->begin(), 1394 FalseMBB->end()); 1395 MBB->removeSuccessor(FalseMBB, true); 1396 if (LandMBB && FalseMBB->succ_size() != 0) 1397 FalseMBB->removeSuccessor(LandMBB, true); 1398 retireBlock(FalseMBB); 1399 MLI->removeBlock(FalseMBB); 1400 } 1401 insertInstrBefore(I, R600::ENDIF); 1402 1403 BranchMI->eraseFromParent(); 1404 1405 if (LandMBB && TrueMBB && FalseMBB) 1406 MBB->addSuccessor(LandMBB); 1407 } 1408 1409 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk, 1410 MachineBasicBlock *LandMBB) { 1411 LLVM_DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber() 1412 << " land = BB" << LandMBB->getNumber() << "\n";); 1413 1414 insertInstrBefore(DstBlk, R600::WHILELOOP, DebugLoc()); 1415 insertInstrEnd(DstBlk, R600::ENDLOOP, DebugLoc()); 1416 DstBlk->replaceSuccessor(DstBlk, LandMBB); 1417 } 1418 1419 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB, 1420 MachineBasicBlock *LandMBB) { 1421 LLVM_DEBUG(dbgs() << "loopbreakPattern exiting = BB" 1422 << ExitingMBB->getNumber() << " land = BB" 1423 << LandMBB->getNumber() << "\n";); 1424 MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB); 1425 assert(BranchMI && isCondBranch(BranchMI)); 1426 DebugLoc DL = BranchMI->getDebugLoc(); 1427 MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI); 1428 MachineBasicBlock::iterator I = BranchMI; 1429 if (TrueBranch != LandMBB) 1430 reversePredicateSetter(I, *I->getParent()); 1431 insertCondBranchBefore(ExitingMBB, I, R600::IF_PREDICATE_SET, R600::PREDICATE_BIT, DL); 1432 insertInstrBefore(I, R600::BREAK); 1433 insertInstrBefore(I, R600::ENDIF); 1434 //now branchInst can be erase safely 1435 BranchMI->eraseFromParent(); 1436 //now take care of successors, retire blocks 1437 ExitingMBB->removeSuccessor(LandMBB, true); 1438 } 1439 1440 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB, 1441 MachineBasicBlock *ContMBB) { 1442 LLVM_DEBUG(dbgs() << "settleLoopcontBlock conting = BB" 1443 << ContingMBB->getNumber() << ", cont = BB" 1444 << ContMBB->getNumber() << "\n";); 1445 1446 MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB); 1447 if (MI) { 1448 assert(isCondBranch(MI)); 1449 MachineBasicBlock::iterator I = MI; 1450 MachineBasicBlock *TrueBranch = getTrueBranch(MI); 1451 int OldOpcode = MI->getOpcode(); 1452 DebugLoc DL = MI->getDebugLoc(); 1453 1454 bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI); 1455 1456 if (!UseContinueLogical) { 1457 int BranchOpcode = 1458 TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) : 1459 getBranchZeroOpcode(OldOpcode); 1460 insertCondBranchBefore(I, BranchOpcode, DL); 1461 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1462 insertInstrEnd(ContingMBB, R600::CONTINUE, DL); 1463 insertInstrEnd(ContingMBB, R600::ENDIF, DL); 1464 } else { 1465 int BranchOpcode = 1466 TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) : 1467 getContinueZeroOpcode(OldOpcode); 1468 insertCondBranchBefore(I, BranchOpcode, DL); 1469 } 1470 1471 MI->eraseFromParent(); 1472 } else { 1473 // if we've arrived here then we've already erased the branch instruction 1474 // travel back up the basic block to see the last reference of our debug 1475 // location we've just inserted that reference here so it should be 1476 // representative insertEnd to ensure phi-moves, if exist, go before the 1477 // continue-instr. 1478 insertInstrEnd(ContingMBB, R600::CONTINUE, 1479 getLastDebugLocInBB(ContingMBB)); 1480 } 1481 } 1482 1483 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB, 1484 MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) { 1485 int Cloned = 0; 1486 assert(PreMBB->isSuccessor(SrcMBB)); 1487 while (SrcMBB && SrcMBB != DstMBB) { 1488 assert(SrcMBB->succ_size() == 1); 1489 if (SrcMBB->pred_size() > 1) { 1490 SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB); 1491 ++Cloned; 1492 } 1493 1494 PreMBB = SrcMBB; 1495 SrcMBB = *SrcMBB->succ_begin(); 1496 } 1497 1498 return Cloned; 1499 } 1500 1501 MachineBasicBlock * 1502 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB, 1503 MachineBasicBlock *PredMBB) { 1504 assert(PredMBB->isSuccessor(MBB) && 1505 "succBlk is not a prececessor of curBlk"); 1506 1507 MachineBasicBlock *CloneMBB = clone(MBB); //clone instructions 1508 replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB); 1509 //srcBlk, oldBlk, newBlk 1510 1511 PredMBB->replaceSuccessor(MBB, CloneMBB); 1512 1513 // add all successor to cloneBlk 1514 cloneSuccessorList(CloneMBB, MBB); 1515 1516 numClonedInstr += MBB->size(); 1517 1518 LLVM_DEBUG(dbgs() << "Cloned block: " 1519 << "BB" << MBB->getNumber() << "size " << MBB->size() 1520 << "\n";); 1521 1522 SHOWNEWBLK(CloneMBB, "result of Cloned block: "); 1523 1524 return CloneMBB; 1525 } 1526 1527 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB, 1528 MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) { 1529 MachineBasicBlock::iterator SpliceEnd; 1530 //look for the input branchinstr, not the AMDGPU branchinstr 1531 MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB); 1532 if (!BranchMI) { 1533 LLVM_DEBUG(dbgs() << "migrateInstruction don't see branch instr\n";); 1534 SpliceEnd = SrcMBB->end(); 1535 } else { 1536 LLVM_DEBUG(dbgs() << "migrateInstruction see branch instr: " << *BranchMI); 1537 SpliceEnd = BranchMI; 1538 } 1539 LLVM_DEBUG(dbgs() << "migrateInstruction before splice dstSize = " 1540 << DstMBB->size() << "srcSize = " << SrcMBB->size() 1541 << "\n";); 1542 1543 //splice insert before insertPos 1544 DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd); 1545 1546 LLVM_DEBUG(dbgs() << "migrateInstruction after splice dstSize = " 1547 << DstMBB->size() << "srcSize = " << SrcMBB->size() 1548 << '\n';); 1549 } 1550 1551 MachineBasicBlock * 1552 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) { 1553 MachineBasicBlock *LoopHeader = LoopRep->getHeader(); 1554 MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch(); 1555 1556 if (!LoopHeader || !LoopLatch) 1557 return nullptr; 1558 MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch); 1559 // Is LoopRep an infinite loop ? 1560 if (!BranchMI || !isUncondBranch(BranchMI)) 1561 return nullptr; 1562 1563 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1564 FuncRep->push_back(DummyExitBlk); //insert to function 1565 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 1566 LLVM_DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";); 1567 LLVMContext &Ctx = LoopHeader->getParent()->getFunction().getContext(); 1568 Ctx.emitError("Extra register needed to handle CFG"); 1569 return nullptr; 1570 } 1571 1572 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) { 1573 MachineInstr *BranchMI; 1574 1575 // I saw two unconditional branch in one basic block in example 1576 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 1577 while ((BranchMI = getLoopendBlockBranchInstr(MBB)) 1578 && isUncondBranch(BranchMI)) { 1579 LLVM_DEBUG(dbgs() << "Removing uncond branch instr: " << *BranchMI); 1580 BranchMI->eraseFromParent(); 1581 } 1582 } 1583 1584 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch( 1585 MachineBasicBlock *MBB) { 1586 if (MBB->succ_size() != 2) 1587 return; 1588 MachineBasicBlock *MBB1 = *MBB->succ_begin(); 1589 MachineBasicBlock *MBB2 = *std::next(MBB->succ_begin()); 1590 if (MBB1 != MBB2) 1591 return; 1592 1593 MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB); 1594 assert(BranchMI && isCondBranch(BranchMI)); 1595 LLVM_DEBUG(dbgs() << "Removing unneeded cond branch instr: " << *BranchMI); 1596 BranchMI->eraseFromParent(); 1597 SHOWNEWBLK(MBB1, "Removing redundant successor"); 1598 MBB->removeSuccessor(MBB1, true); 1599 } 1600 1601 void AMDGPUCFGStructurizer::addDummyExitBlock( 1602 SmallVectorImpl<MachineBasicBlock*> &RetMBB) { 1603 MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock(); 1604 FuncRep->push_back(DummyExitBlk); //insert to function 1605 insertInstrEnd(DummyExitBlk, R600::RETURN); 1606 1607 for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(), 1608 E = RetMBB.end(); It != E; ++It) { 1609 MachineBasicBlock *MBB = *It; 1610 MachineInstr *MI = getReturnInstr(MBB); 1611 if (MI) 1612 MI->eraseFromParent(); 1613 MBB->addSuccessor(DummyExitBlk); 1614 LLVM_DEBUG(dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber() 1615 << " successors\n";); 1616 } 1617 SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: "); 1618 } 1619 1620 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) { 1621 while (MBB->succ_size()) 1622 MBB->removeSuccessor(*MBB->succ_begin()); 1623 } 1624 1625 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB, 1626 int SccNum) { 1627 BlockInformation *&srcBlkInfo = BlockInfoMap[MBB]; 1628 if (!srcBlkInfo) 1629 srcBlkInfo = new BlockInformation(); 1630 srcBlkInfo->SccNum = SccNum; 1631 } 1632 1633 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) { 1634 LLVM_DEBUG(dbgs() << "Retiring BB" << MBB->getNumber() << "\n";); 1635 1636 BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB]; 1637 1638 if (!SrcBlkInfo) 1639 SrcBlkInfo = new BlockInformation(); 1640 1641 SrcBlkInfo->IsRetired = true; 1642 assert(MBB->succ_size() == 0 && MBB->pred_size() == 0 1643 && "can't retire block yet"); 1644 } 1645 1646 INITIALIZE_PASS_BEGIN(AMDGPUCFGStructurizer, "amdgpustructurizer", 1647 "AMDGPU CFG Structurizer", false, false) 1648 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 1649 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree) 1650 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 1651 INITIALIZE_PASS_END(AMDGPUCFGStructurizer, "amdgpustructurizer", 1652 "AMDGPU CFG Structurizer", false, false) 1653 1654 FunctionPass *llvm::createAMDGPUCFGStructurizerPass() { 1655 return new AMDGPUCFGStructurizer(); 1656 } 1657