1 //===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This utility class duplicates basic blocks ending in unconditional branches 11 // into the tails of their predecessors. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/TailDuplicator.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SetVector.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/raw_ostream.h" 30 using namespace llvm; 31 32 #define DEBUG_TYPE "tailduplication" 33 34 STATISTIC(NumTails, "Number of tails duplicated"); 35 STATISTIC(NumTailDups, "Number of tail duplicated blocks"); 36 STATISTIC(NumInstrDups, "Additional instructions due to tail duplication"); 37 STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 38 STATISTIC(NumAddedPHIs, "Number of phis added"); 39 40 // Heuristic for tail duplication. 41 static cl::opt<unsigned> TailDuplicateSize( 42 "tail-dup-size", 43 cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), 44 cl::Hidden); 45 46 static cl::opt<bool> 47 TailDupVerify("tail-dup-verify", 48 cl::desc("Verify sanity of PHI instructions during taildup"), 49 cl::init(false), cl::Hidden); 50 51 static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), 52 cl::Hidden); 53 54 namespace llvm { 55 56 void TailDuplicator::initMF(MachineFunction &MF, const MachineModuleInfo *MMIin, 57 const MachineBranchProbabilityInfo *MBPIin) { 58 TII = MF.getSubtarget().getInstrInfo(); 59 TRI = MF.getSubtarget().getRegisterInfo(); 60 MRI = &MF.getRegInfo(); 61 MMI = MMIin; 62 MBPI = MBPIin; 63 64 assert(MBPI != nullptr && "Machine Branch Probability Info required"); 65 66 PreRegAlloc = MRI->isSSA(); 67 RS.reset(); 68 69 if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) 70 RS.reset(new RegScavenger()); 71 } 72 73 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { 74 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { 75 MachineBasicBlock *MBB = &*I; 76 SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(), 77 MBB->pred_end()); 78 MachineBasicBlock::iterator MI = MBB->begin(); 79 while (MI != MBB->end()) { 80 if (!MI->isPHI()) 81 break; 82 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 83 PE = Preds.end(); 84 PI != PE; ++PI) { 85 MachineBasicBlock *PredBB = *PI; 86 bool Found = false; 87 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 88 MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 89 if (PHIBB == PredBB) { 90 Found = true; 91 break; 92 } 93 } 94 if (!Found) { 95 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 96 dbgs() << " missing input from predecessor BB#" 97 << PredBB->getNumber() << '\n'; 98 llvm_unreachable(nullptr); 99 } 100 } 101 102 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 103 MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 104 if (CheckExtra && !Preds.count(PHIBB)) { 105 dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": " 106 << *MI; 107 dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() 108 << '\n'; 109 llvm_unreachable(nullptr); 110 } 111 if (PHIBB->getNumber() < 0) { 112 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 113 dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; 114 llvm_unreachable(nullptr); 115 } 116 } 117 ++MI; 118 } 119 } 120 } 121 122 /// Tail duplicate the block and cleanup. 123 bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple, 124 MachineBasicBlock *MBB) { 125 // Save the successors list. 126 SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(), 127 MBB->succ_end()); 128 129 SmallVector<MachineBasicBlock *, 8> TDBBs; 130 SmallVector<MachineInstr *, 16> Copies; 131 if (!tailDuplicate(MF, IsSimple, MBB, TDBBs, Copies)) 132 return false; 133 134 ++NumTails; 135 136 SmallVector<MachineInstr *, 8> NewPHIs; 137 MachineSSAUpdater SSAUpdate(MF, &NewPHIs); 138 139 // TailBB's immediate successors are now successors of those predecessors 140 // which duplicated TailBB. Add the predecessors as sources to the PHI 141 // instructions. 142 bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); 143 if (PreRegAlloc) 144 updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); 145 146 // If it is dead, remove it. 147 if (isDead) { 148 NumInstrDups -= MBB->size(); 149 removeDeadBlock(MBB); 150 ++NumDeadBlocks; 151 } 152 153 // Update SSA form. 154 if (!SSAUpdateVRs.empty()) { 155 for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 156 unsigned VReg = SSAUpdateVRs[i]; 157 SSAUpdate.Initialize(VReg); 158 159 // If the original definition is still around, add it as an available 160 // value. 161 MachineInstr *DefMI = MRI->getVRegDef(VReg); 162 MachineBasicBlock *DefBB = nullptr; 163 if (DefMI) { 164 DefBB = DefMI->getParent(); 165 SSAUpdate.AddAvailableValue(DefBB, VReg); 166 } 167 168 // Add the new vregs as available values. 169 DenseMap<unsigned, AvailableValsTy>::iterator LI = 170 SSAUpdateVals.find(VReg); 171 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 172 MachineBasicBlock *SrcBB = LI->second[j].first; 173 unsigned SrcReg = LI->second[j].second; 174 SSAUpdate.AddAvailableValue(SrcBB, SrcReg); 175 } 176 177 // Rewrite uses that are outside of the original def's block. 178 MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); 179 while (UI != MRI->use_end()) { 180 MachineOperand &UseMO = *UI; 181 MachineInstr *UseMI = UseMO.getParent(); 182 ++UI; 183 if (UseMI->isDebugValue()) { 184 // SSAUpdate can replace the use with an undef. That creates 185 // a debug instruction that is a kill. 186 // FIXME: Should it SSAUpdate job to delete debug instructions 187 // instead of replacing the use with undef? 188 UseMI->eraseFromParent(); 189 continue; 190 } 191 if (UseMI->getParent() == DefBB && !UseMI->isPHI()) 192 continue; 193 SSAUpdate.RewriteUse(UseMO); 194 } 195 } 196 197 SSAUpdateVRs.clear(); 198 SSAUpdateVals.clear(); 199 } 200 201 // Eliminate some of the copies inserted by tail duplication to maintain 202 // SSA form. 203 for (unsigned i = 0, e = Copies.size(); i != e; ++i) { 204 MachineInstr *Copy = Copies[i]; 205 if (!Copy->isCopy()) 206 continue; 207 unsigned Dst = Copy->getOperand(0).getReg(); 208 unsigned Src = Copy->getOperand(1).getReg(); 209 if (MRI->hasOneNonDBGUse(Src) && 210 MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { 211 // Copy is the only use. Do trivial copy propagation here. 212 MRI->replaceRegWith(Dst, Src); 213 Copy->eraseFromParent(); 214 } 215 } 216 217 if (NewPHIs.size()) 218 NumAddedPHIs += NewPHIs.size(); 219 220 return true; 221 } 222 223 /// Look for small blocks that are unconditionally branched to and do not fall 224 /// through. Tail-duplicate their instructions into their predecessors to 225 /// eliminate (dynamic) branches. 226 bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) { 227 bool MadeChange = false; 228 229 if (PreRegAlloc && TailDupVerify) { 230 DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); 231 VerifyPHIs(MF, true); 232 } 233 234 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E;) { 235 MachineBasicBlock *MBB = &*I++; 236 237 if (NumTails == TailDupLimit) 238 break; 239 240 bool IsSimple = isSimpleBB(MBB); 241 242 if (!shouldTailDuplicate(MF, IsSimple, *MBB)) 243 continue; 244 245 MadeChange |= tailDuplicateAndUpdate(MF, IsSimple, MBB); 246 } 247 248 if (PreRegAlloc && TailDupVerify) 249 VerifyPHIs(MF, false); 250 251 return MadeChange; 252 } 253 254 static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 255 const MachineRegisterInfo *MRI) { 256 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 257 if (UseMI.isDebugValue()) 258 continue; 259 if (UseMI.getParent() != BB) 260 return true; 261 } 262 return false; 263 } 264 265 static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 266 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 267 if (MI->getOperand(i + 1).getMBB() == SrcBB) 268 return i; 269 return 0; 270 } 271 272 // Remember which registers are used by phis in this block. This is 273 // used to determine which registers are liveout while modifying the 274 // block (which is why we need to copy the information). 275 static void getRegsUsedByPHIs(const MachineBasicBlock &BB, 276 DenseSet<unsigned> *UsedByPhi) { 277 for (const auto &MI : BB) { 278 if (!MI.isPHI()) 279 break; 280 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 281 unsigned SrcReg = MI.getOperand(i).getReg(); 282 UsedByPhi->insert(SrcReg); 283 } 284 } 285 } 286 287 /// Add a definition and source virtual registers pair for SSA update. 288 void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 289 MachineBasicBlock *BB) { 290 DenseMap<unsigned, AvailableValsTy>::iterator LI = 291 SSAUpdateVals.find(OrigReg); 292 if (LI != SSAUpdateVals.end()) 293 LI->second.push_back(std::make_pair(BB, NewReg)); 294 else { 295 AvailableValsTy Vals; 296 Vals.push_back(std::make_pair(BB, NewReg)); 297 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 298 SSAUpdateVRs.push_back(OrigReg); 299 } 300 } 301 302 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the 303 /// source register that's contributed by PredBB and update SSA update map. 304 void TailDuplicator::processPHI( 305 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 306 DenseMap<unsigned, unsigned> &LocalVRMap, 307 SmallVectorImpl<std::pair<unsigned, unsigned>> &Copies, 308 const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { 309 unsigned DefReg = MI->getOperand(0).getReg(); 310 unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); 311 assert(SrcOpIdx && "Unable to find matching PHI source?"); 312 unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); 313 const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 314 LocalVRMap.insert(std::make_pair(DefReg, SrcReg)); 315 316 // Insert a copy from source to the end of the block. The def register is the 317 // available value liveout of the block. 318 unsigned NewDef = MRI->createVirtualRegister(RC); 319 Copies.push_back(std::make_pair(NewDef, SrcReg)); 320 if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) 321 addSSAUpdateEntry(DefReg, NewDef, PredBB); 322 323 if (!Remove) 324 return; 325 326 // Remove PredBB from the PHI node. 327 MI->RemoveOperand(SrcOpIdx + 1); 328 MI->RemoveOperand(SrcOpIdx); 329 if (MI->getNumOperands() == 1) 330 MI->eraseFromParent(); 331 } 332 333 /// Duplicate a TailBB instruction to PredBB and update 334 /// the source operands due to earlier PHI translation. 335 void TailDuplicator::duplicateInstruction( 336 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 337 MachineFunction &MF, DenseMap<unsigned, unsigned> &LocalVRMap, 338 const DenseSet<unsigned> &UsedByPhi) { 339 MachineInstr *NewMI = TII->duplicate(MI, MF); 340 if (PreRegAlloc) { 341 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { 342 MachineOperand &MO = NewMI->getOperand(i); 343 if (!MO.isReg()) 344 continue; 345 unsigned Reg = MO.getReg(); 346 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 347 continue; 348 if (MO.isDef()) { 349 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 350 unsigned NewReg = MRI->createVirtualRegister(RC); 351 MO.setReg(NewReg); 352 LocalVRMap.insert(std::make_pair(Reg, NewReg)); 353 if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) 354 addSSAUpdateEntry(Reg, NewReg, PredBB); 355 } else { 356 DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg); 357 if (VI != LocalVRMap.end()) { 358 MO.setReg(VI->second); 359 // Clear any kill flags from this operand. The new register could 360 // have uses after this one, so kills are not valid here. 361 MO.setIsKill(false); 362 MRI->constrainRegClass(VI->second, MRI->getRegClass(Reg)); 363 } 364 } 365 } 366 } 367 PredBB->insert(PredBB->instr_end(), NewMI); 368 } 369 370 /// After FromBB is tail duplicated into its predecessor blocks, the successors 371 /// have gained new predecessors. Update the PHI instructions in them 372 /// accordingly. 373 void TailDuplicator::updateSuccessorsPHIs( 374 MachineBasicBlock *FromBB, bool isDead, 375 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 376 SmallSetVector<MachineBasicBlock *, 8> &Succs) { 377 for (SmallSetVector<MachineBasicBlock *, 8>::iterator SI = Succs.begin(), 378 SE = Succs.end(); 379 SI != SE; ++SI) { 380 MachineBasicBlock *SuccBB = *SI; 381 for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); 382 II != EE; ++II) { 383 if (!II->isPHI()) 384 break; 385 MachineInstrBuilder MIB(*FromBB->getParent(), II); 386 unsigned Idx = 0; 387 for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { 388 MachineOperand &MO = II->getOperand(i + 1); 389 if (MO.getMBB() == FromBB) { 390 Idx = i; 391 break; 392 } 393 } 394 395 assert(Idx != 0); 396 MachineOperand &MO0 = II->getOperand(Idx); 397 unsigned Reg = MO0.getReg(); 398 if (isDead) { 399 // Folded into the previous BB. 400 // There could be duplicate phi source entries. FIXME: Should sdisel 401 // or earlier pass fixed this? 402 for (unsigned i = II->getNumOperands() - 2; i != Idx; i -= 2) { 403 MachineOperand &MO = II->getOperand(i + 1); 404 if (MO.getMBB() == FromBB) { 405 II->RemoveOperand(i + 1); 406 II->RemoveOperand(i); 407 } 408 } 409 } else 410 Idx = 0; 411 412 // If Idx is set, the operands at Idx and Idx+1 must be removed. 413 // We reuse the location to avoid expensive RemoveOperand calls. 414 415 DenseMap<unsigned, AvailableValsTy>::iterator LI = 416 SSAUpdateVals.find(Reg); 417 if (LI != SSAUpdateVals.end()) { 418 // This register is defined in the tail block. 419 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 420 MachineBasicBlock *SrcBB = LI->second[j].first; 421 // If we didn't duplicate a bb into a particular predecessor, we 422 // might still have added an entry to SSAUpdateVals to correcly 423 // recompute SSA. If that case, avoid adding a dummy extra argument 424 // this PHI. 425 if (!SrcBB->isSuccessor(SuccBB)) 426 continue; 427 428 unsigned SrcReg = LI->second[j].second; 429 if (Idx != 0) { 430 II->getOperand(Idx).setReg(SrcReg); 431 II->getOperand(Idx + 1).setMBB(SrcBB); 432 Idx = 0; 433 } else { 434 MIB.addReg(SrcReg).addMBB(SrcBB); 435 } 436 } 437 } else { 438 // Live in tail block, must also be live in predecessors. 439 for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { 440 MachineBasicBlock *SrcBB = TDBBs[j]; 441 if (Idx != 0) { 442 II->getOperand(Idx).setReg(Reg); 443 II->getOperand(Idx + 1).setMBB(SrcBB); 444 Idx = 0; 445 } else { 446 MIB.addReg(Reg).addMBB(SrcBB); 447 } 448 } 449 } 450 if (Idx != 0) { 451 II->RemoveOperand(Idx + 1); 452 II->RemoveOperand(Idx); 453 } 454 } 455 } 456 } 457 458 /// Determine if it is profitable to duplicate this block. 459 bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF, 460 bool IsSimple, 461 MachineBasicBlock &TailBB) { 462 // Only duplicate blocks that end with unconditional branches. 463 if (TailBB.canFallThrough()) 464 return false; 465 466 // Don't try to tail-duplicate single-block loops. 467 if (TailBB.isSuccessor(&TailBB)) 468 return false; 469 470 // Set the limit on the cost to duplicate. When optimizing for size, 471 // duplicate only one, because one branch instruction can be eliminated to 472 // compensate for the duplication. 473 unsigned MaxDuplicateCount; 474 if (TailDuplicateSize.getNumOccurrences() == 0 && 475 // FIXME: Use Function::optForSize(). 476 MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) 477 MaxDuplicateCount = 1; 478 else 479 MaxDuplicateCount = TailDuplicateSize; 480 481 // If the target has hardware branch prediction that can handle indirect 482 // branches, duplicating them can often make them predictable when there 483 // are common paths through the code. The limit needs to be high enough 484 // to allow undoing the effects of tail merging and other optimizations 485 // that rearrange the predecessors of the indirect branch. 486 487 bool HasIndirectbr = false; 488 if (!TailBB.empty()) 489 HasIndirectbr = TailBB.back().isIndirectBranch(); 490 491 if (HasIndirectbr && PreRegAlloc) 492 MaxDuplicateCount = 20; 493 494 // Check the instructions in the block to determine whether tail-duplication 495 // is invalid or unlikely to be profitable. 496 unsigned InstrCount = 0; 497 for (MachineInstr &MI : TailBB) { 498 // Non-duplicable things shouldn't be tail-duplicated. 499 if (MI.isNotDuplicable()) 500 return false; 501 502 // Convergent instructions can be duplicated only if doing so doesn't add 503 // new control dependencies, which is what we're going to do here. 504 if (MI.isConvergent()) 505 return false; 506 507 // Do not duplicate 'return' instructions if this is a pre-regalloc run. 508 // A return may expand into a lot more instructions (e.g. reload of callee 509 // saved registers) after PEI. 510 if (PreRegAlloc && MI.isReturn()) 511 return false; 512 513 // Avoid duplicating calls before register allocation. Calls presents a 514 // barrier to register allocation so duplicating them may end up increasing 515 // spills. 516 if (PreRegAlloc && MI.isCall()) 517 return false; 518 519 if (!MI.isPHI() && !MI.isDebugValue()) 520 InstrCount += 1; 521 522 if (InstrCount > MaxDuplicateCount) 523 return false; 524 } 525 526 // Check if any of the successors of TailBB has a PHI node in which the 527 // value corresponding to TailBB uses a subregister. 528 // If a phi node uses a register paired with a subregister, the actual 529 // "value type" of the phi may differ from the type of the register without 530 // any subregisters. Due to a bug, tail duplication may add a new operand 531 // without a necessary subregister, producing an invalid code. This is 532 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. 533 // Disable tail duplication for this case for now, until the problem is 534 // fixed. 535 for (auto SB : TailBB.successors()) { 536 for (auto &I : *SB) { 537 if (!I.isPHI()) 538 break; 539 unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); 540 assert(Idx != 0); 541 MachineOperand &PU = I.getOperand(Idx); 542 if (PU.getSubReg() != 0) 543 return false; 544 } 545 } 546 547 if (HasIndirectbr && PreRegAlloc) 548 return true; 549 550 if (IsSimple) 551 return true; 552 553 if (!PreRegAlloc) 554 return true; 555 556 return canCompletelyDuplicateBB(TailBB); 557 } 558 559 /// True if this BB has only one unconditional jump. 560 bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) { 561 if (TailBB->succ_size() != 1) 562 return false; 563 if (TailBB->pred_empty()) 564 return false; 565 MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(); 566 if (I == TailBB->end()) 567 return true; 568 return I->isUnconditionalBranch(); 569 } 570 571 static bool bothUsedInPHI(const MachineBasicBlock &A, 572 SmallPtrSet<MachineBasicBlock *, 8> SuccsB) { 573 for (MachineBasicBlock *BB : A.successors()) 574 if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) 575 return true; 576 577 return false; 578 } 579 580 bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { 581 for (MachineBasicBlock *PredBB : BB.predecessors()) { 582 if (PredBB->succ_size() > 1) 583 return false; 584 585 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 586 SmallVector<MachineOperand, 4> PredCond; 587 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 588 return false; 589 590 if (!PredCond.empty()) 591 return false; 592 } 593 return true; 594 } 595 596 bool TailDuplicator::duplicateSimpleBB( 597 MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, 598 const DenseSet<unsigned> &UsedByPhi, 599 SmallVectorImpl<MachineInstr *> &Copies) { 600 SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), 601 TailBB->succ_end()); 602 SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 603 TailBB->pred_end()); 604 bool Changed = false; 605 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 606 PE = Preds.end(); 607 PI != PE; ++PI) { 608 MachineBasicBlock *PredBB = *PI; 609 610 if (PredBB->hasEHPadSuccessor()) 611 continue; 612 613 if (bothUsedInPHI(*PredBB, Succs)) 614 continue; 615 616 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 617 SmallVector<MachineOperand, 4> PredCond; 618 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 619 continue; 620 621 Changed = true; 622 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 623 << "From simple Succ: " << *TailBB); 624 625 MachineBasicBlock *NewTarget = *TailBB->succ_begin(); 626 MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator()); 627 628 // Make PredFBB explicit. 629 if (PredCond.empty()) 630 PredFBB = PredTBB; 631 632 // Make fall through explicit. 633 if (!PredTBB) 634 PredTBB = NextBB; 635 if (!PredFBB) 636 PredFBB = NextBB; 637 638 // Redirect 639 if (PredFBB == TailBB) 640 PredFBB = NewTarget; 641 if (PredTBB == TailBB) 642 PredTBB = NewTarget; 643 644 // Make the branch unconditional if possible 645 if (PredTBB == PredFBB) { 646 PredCond.clear(); 647 PredFBB = nullptr; 648 } 649 650 // Avoid adding fall through branches. 651 if (PredFBB == NextBB) 652 PredFBB = nullptr; 653 if (PredTBB == NextBB && PredFBB == nullptr) 654 PredTBB = nullptr; 655 656 TII->RemoveBranch(*PredBB); 657 658 if (!PredBB->isSuccessor(NewTarget)) 659 PredBB->replaceSuccessor(TailBB, NewTarget); 660 else { 661 PredBB->removeSuccessor(TailBB, true); 662 assert(PredBB->succ_size() <= 1); 663 } 664 665 if (PredTBB) 666 TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); 667 668 TDBBs.push_back(PredBB); 669 } 670 return Changed; 671 } 672 673 /// If it is profitable, duplicate TailBB's contents in each 674 /// of its predecessors. 675 bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, 676 MachineBasicBlock *TailBB, 677 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 678 SmallVectorImpl<MachineInstr *> &Copies) { 679 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); 680 681 DenseSet<unsigned> UsedByPhi; 682 getRegsUsedByPHIs(*TailBB, &UsedByPhi); 683 684 if (IsSimple) 685 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); 686 687 // Iterate through all the unique predecessors and tail-duplicate this 688 // block into them, if possible. Copying the list ahead of time also 689 // avoids trouble with the predecessor list reallocating. 690 bool Changed = false; 691 SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 692 TailBB->pred_end()); 693 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 694 PE = Preds.end(); 695 PI != PE; ++PI) { 696 MachineBasicBlock *PredBB = *PI; 697 698 assert(TailBB != PredBB && 699 "Single-block loop should have been rejected earlier!"); 700 // EH edges are ignored by AnalyzeBranch. 701 if (PredBB->succ_size() > 1) 702 continue; 703 704 MachineBasicBlock *PredTBB, *PredFBB; 705 SmallVector<MachineOperand, 4> PredCond; 706 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 707 continue; 708 if (!PredCond.empty()) 709 continue; 710 // Don't duplicate into a fall-through predecessor (at least for now). 711 if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 712 continue; 713 714 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 715 << "From Succ: " << *TailBB); 716 717 TDBBs.push_back(PredBB); 718 719 // Remove PredBB's unconditional branch. 720 TII->RemoveBranch(*PredBB); 721 722 if (RS && !TailBB->livein_empty()) { 723 // Update PredBB livein. 724 RS->enterBasicBlock(*PredBB); 725 if (!PredBB->empty()) 726 RS->forward(std::prev(PredBB->end())); 727 for (const auto &LI : TailBB->liveins()) { 728 if (!RS->isRegUsed(LI.PhysReg, false)) 729 // If a register is previously livein to the tail but it's not live 730 // at the end of predecessor BB, then it should be added to its 731 // livein list. 732 PredBB->addLiveIn(LI); 733 } 734 } 735 736 // Clone the contents of TailBB into PredBB. 737 DenseMap<unsigned, unsigned> LocalVRMap; 738 SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos; 739 // Use instr_iterator here to properly handle bundles, e.g. 740 // ARM Thumb2 IT block. 741 MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); 742 while (I != TailBB->instr_end()) { 743 MachineInstr *MI = &*I; 744 ++I; 745 if (MI->isPHI()) { 746 // Replace the uses of the def of the PHI with the register coming 747 // from PredBB. 748 processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 749 } else { 750 // Replace def of virtual registers with new registers, and update 751 // uses with PHI source register or the new registers. 752 duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); 753 } 754 } 755 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 756 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 757 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 758 TII->get(TargetOpcode::COPY), CopyInfos[i].first) 759 .addReg(CopyInfos[i].second)); 760 } 761 762 // Simplify 763 TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); 764 765 NumInstrDups += TailBB->size() - 1; // subtract one for removed branch 766 767 // Update the CFG. 768 PredBB->removeSuccessor(PredBB->succ_begin()); 769 assert(PredBB->succ_empty() && 770 "TailDuplicate called on block with multiple successors!"); 771 for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), 772 E = TailBB->succ_end(); 773 I != E; ++I) 774 PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I)); 775 776 Changed = true; 777 ++NumTailDups; 778 } 779 780 // If TailBB was duplicated into all its predecessors except for the prior 781 // block, which falls through unconditionally, move the contents of this 782 // block into the prior block. 783 MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); 784 MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; 785 SmallVector<MachineOperand, 4> PriorCond; 786 // This has to check PrevBB->succ_size() because EH edges are ignored by 787 // AnalyzeBranch. 788 if (PrevBB->succ_size() == 1 && 789 !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && 790 PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && 791 !TailBB->hasAddressTaken()) { 792 DEBUG(dbgs() << "\nMerging into block: " << *PrevBB 793 << "From MBB: " << *TailBB); 794 if (PreRegAlloc) { 795 DenseMap<unsigned, unsigned> LocalVRMap; 796 SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos; 797 MachineBasicBlock::iterator I = TailBB->begin(); 798 // Process PHI instructions first. 799 while (I != TailBB->end() && I->isPHI()) { 800 // Replace the uses of the def of the PHI with the register coming 801 // from PredBB. 802 MachineInstr *MI = &*I++; 803 processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); 804 } 805 806 // Now copy the non-PHI instructions. 807 while (I != TailBB->end()) { 808 // Replace def of virtual registers with new registers, and update 809 // uses with PHI source register or the new registers. 810 MachineInstr *MI = &*I++; 811 assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); 812 duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); 813 MI->eraseFromParent(); 814 } 815 MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator(); 816 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 817 Copies.push_back(BuildMI(*PrevBB, Loc, DebugLoc(), 818 TII->get(TargetOpcode::COPY), 819 CopyInfos[i].first) 820 .addReg(CopyInfos[i].second)); 821 } 822 } else { 823 // No PHIs to worry about, just splice the instructions over. 824 PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); 825 } 826 PrevBB->removeSuccessor(PrevBB->succ_begin()); 827 assert(PrevBB->succ_empty()); 828 PrevBB->transferSuccessors(TailBB); 829 TDBBs.push_back(PrevBB); 830 Changed = true; 831 } 832 833 // If this is after register allocation, there are no phis to fix. 834 if (!PreRegAlloc) 835 return Changed; 836 837 // If we made no changes so far, we are safe. 838 if (!Changed) 839 return Changed; 840 841 // Handle the nasty case in that we duplicated a block that is part of a loop 842 // into some but not all of its predecessors. For example: 843 // 1 -> 2 <-> 3 | 844 // \ | 845 // \---> rest | 846 // if we duplicate 2 into 1 but not into 3, we end up with 847 // 12 -> 3 <-> 2 -> rest | 848 // \ / | 849 // \----->-----/ | 850 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced 851 // with a phi in 3 (which now dominates 2). 852 // What we do here is introduce a copy in 3 of the register defined by the 853 // phi, just like when we are duplicating 2 into 3, but we don't copy any 854 // real instructions or remove the 3 -> 2 edge from the phi in 2. 855 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 856 PE = Preds.end(); 857 PI != PE; ++PI) { 858 MachineBasicBlock *PredBB = *PI; 859 if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) 860 continue; 861 862 // EH edges 863 if (PredBB->succ_size() != 1) 864 continue; 865 866 DenseMap<unsigned, unsigned> LocalVRMap; 867 SmallVector<std::pair<unsigned, unsigned>, 4> CopyInfos; 868 MachineBasicBlock::iterator I = TailBB->begin(); 869 // Process PHI instructions first. 870 while (I != TailBB->end() && I->isPHI()) { 871 // Replace the uses of the def of the PHI with the register coming 872 // from PredBB. 873 MachineInstr *MI = &*I++; 874 processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); 875 } 876 MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator(); 877 for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) { 878 Copies.push_back(BuildMI(*PredBB, Loc, DebugLoc(), 879 TII->get(TargetOpcode::COPY), CopyInfos[i].first) 880 .addReg(CopyInfos[i].second)); 881 } 882 } 883 884 return Changed; 885 } 886 887 /// Remove the specified dead machine basic block from the function, updating 888 /// the CFG. 889 void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) { 890 assert(MBB->pred_empty() && "MBB must be dead!"); 891 DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); 892 893 // Remove all successors. 894 while (!MBB->succ_empty()) 895 MBB->removeSuccessor(MBB->succ_end() - 1); 896 897 // Remove the block. 898 MBB->eraseFromParent(); 899 } 900 901 } // End llvm namespace 902