1 //===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This utility class duplicates basic blocks ending in unconditional branches 11 // into the tails of their predecessors. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/TailDuplicator.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SetVector.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/raw_ostream.h" 30 using namespace llvm; 31 32 #define DEBUG_TYPE "tailduplication" 33 34 STATISTIC(NumTails, "Number of tails duplicated"); 35 STATISTIC(NumTailDups, "Number of tail duplicated blocks"); 36 STATISTIC(NumTailDupAdded, 37 "Number of instructions added due to tail duplication"); 38 STATISTIC(NumTailDupRemoved, 39 "Number of instructions removed due to tail duplication"); 40 STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 41 STATISTIC(NumAddedPHIs, "Number of phis added"); 42 43 namespace llvm { 44 45 // Heuristic for tail duplication. 46 static cl::opt<unsigned> TailDuplicateSize( 47 "tail-dup-size", 48 cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), 49 cl::Hidden); 50 51 cl::opt<unsigned> TailDupIndirectBranchSize( 52 "tail-dup-indirect-size", 53 cl::desc("Maximum instructions to consider tail duplicating blocks that " 54 "end with indirect branches."), cl::init(20), 55 cl::Hidden); 56 57 static cl::opt<bool> 58 TailDupVerify("tail-dup-verify", 59 cl::desc("Verify sanity of PHI instructions during taildup"), 60 cl::init(false), cl::Hidden); 61 62 static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), 63 cl::Hidden); 64 65 void TailDuplicator::initMF(MachineFunction &MFin, 66 const MachineBranchProbabilityInfo *MBPIin, 67 unsigned TailDupSizeIn) { 68 MF = &MFin; 69 TII = MF->getSubtarget().getInstrInfo(); 70 TRI = MF->getSubtarget().getRegisterInfo(); 71 MRI = &MF->getRegInfo(); 72 MMI = &MF->getMMI(); 73 MBPI = MBPIin; 74 TailDupSize = TailDupSizeIn; 75 76 assert(MBPI != nullptr && "Machine Branch Probability Info required"); 77 78 PreRegAlloc = MRI->isSSA(); 79 } 80 81 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { 82 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { 83 MachineBasicBlock *MBB = &*I; 84 SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(), 85 MBB->pred_end()); 86 MachineBasicBlock::iterator MI = MBB->begin(); 87 while (MI != MBB->end()) { 88 if (!MI->isPHI()) 89 break; 90 for (MachineBasicBlock *PredBB : Preds) { 91 bool Found = false; 92 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 93 MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 94 if (PHIBB == PredBB) { 95 Found = true; 96 break; 97 } 98 } 99 if (!Found) { 100 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 101 dbgs() << " missing input from predecessor BB#" 102 << PredBB->getNumber() << '\n'; 103 llvm_unreachable(nullptr); 104 } 105 } 106 107 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 108 MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 109 if (CheckExtra && !Preds.count(PHIBB)) { 110 dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": " 111 << *MI; 112 dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() 113 << '\n'; 114 llvm_unreachable(nullptr); 115 } 116 if (PHIBB->getNumber() < 0) { 117 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 118 dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; 119 llvm_unreachable(nullptr); 120 } 121 } 122 ++MI; 123 } 124 } 125 } 126 127 /// Tail duplicate the block and cleanup. 128 /// \p IsSimple - return value of isSimpleBB 129 /// \p MBB - block to be duplicated 130 /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of 131 /// all Preds that received a copy of \p MBB. 132 bool TailDuplicator::tailDuplicateAndUpdate( 133 bool IsSimple, MachineBasicBlock *MBB, 134 SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds) { 135 // Save the successors list. 136 SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(), 137 MBB->succ_end()); 138 139 SmallVector<MachineBasicBlock *, 8> TDBBs; 140 SmallVector<MachineInstr *, 16> Copies; 141 if (!tailDuplicate(IsSimple, MBB, TDBBs, Copies)) 142 return false; 143 144 ++NumTails; 145 146 SmallVector<MachineInstr *, 8> NewPHIs; 147 MachineSSAUpdater SSAUpdate(*MF, &NewPHIs); 148 149 // TailBB's immediate successors are now successors of those predecessors 150 // which duplicated TailBB. Add the predecessors as sources to the PHI 151 // instructions. 152 bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); 153 if (PreRegAlloc) 154 updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); 155 156 // If it is dead, remove it. 157 if (isDead) { 158 NumTailDupRemoved += MBB->size(); 159 removeDeadBlock(MBB); 160 ++NumDeadBlocks; 161 } 162 163 // Update SSA form. 164 if (!SSAUpdateVRs.empty()) { 165 for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 166 unsigned VReg = SSAUpdateVRs[i]; 167 SSAUpdate.Initialize(VReg); 168 169 // If the original definition is still around, add it as an available 170 // value. 171 MachineInstr *DefMI = MRI->getVRegDef(VReg); 172 MachineBasicBlock *DefBB = nullptr; 173 if (DefMI) { 174 DefBB = DefMI->getParent(); 175 SSAUpdate.AddAvailableValue(DefBB, VReg); 176 } 177 178 // Add the new vregs as available values. 179 DenseMap<unsigned, AvailableValsTy>::iterator LI = 180 SSAUpdateVals.find(VReg); 181 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 182 MachineBasicBlock *SrcBB = LI->second[j].first; 183 unsigned SrcReg = LI->second[j].second; 184 SSAUpdate.AddAvailableValue(SrcBB, SrcReg); 185 } 186 187 // Rewrite uses that are outside of the original def's block. 188 MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); 189 while (UI != MRI->use_end()) { 190 MachineOperand &UseMO = *UI; 191 MachineInstr *UseMI = UseMO.getParent(); 192 ++UI; 193 if (UseMI->isDebugValue()) { 194 // SSAUpdate can replace the use with an undef. That creates 195 // a debug instruction that is a kill. 196 // FIXME: Should it SSAUpdate job to delete debug instructions 197 // instead of replacing the use with undef? 198 UseMI->eraseFromParent(); 199 continue; 200 } 201 if (UseMI->getParent() == DefBB && !UseMI->isPHI()) 202 continue; 203 SSAUpdate.RewriteUse(UseMO); 204 } 205 } 206 207 SSAUpdateVRs.clear(); 208 SSAUpdateVals.clear(); 209 } 210 211 // Eliminate some of the copies inserted by tail duplication to maintain 212 // SSA form. 213 for (unsigned i = 0, e = Copies.size(); i != e; ++i) { 214 MachineInstr *Copy = Copies[i]; 215 if (!Copy->isCopy()) 216 continue; 217 unsigned Dst = Copy->getOperand(0).getReg(); 218 unsigned Src = Copy->getOperand(1).getReg(); 219 if (MRI->hasOneNonDBGUse(Src) && 220 MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { 221 // Copy is the only use. Do trivial copy propagation here. 222 MRI->replaceRegWith(Dst, Src); 223 Copy->eraseFromParent(); 224 } 225 } 226 227 if (NewPHIs.size()) 228 NumAddedPHIs += NewPHIs.size(); 229 230 if (DuplicatedPreds) 231 *DuplicatedPreds = std::move(TDBBs); 232 233 return true; 234 } 235 236 /// Look for small blocks that are unconditionally branched to and do not fall 237 /// through. Tail-duplicate their instructions into their predecessors to 238 /// eliminate (dynamic) branches. 239 bool TailDuplicator::tailDuplicateBlocks() { 240 bool MadeChange = false; 241 242 if (PreRegAlloc && TailDupVerify) { 243 DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); 244 VerifyPHIs(*MF, true); 245 } 246 247 for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) { 248 MachineBasicBlock *MBB = &*I++; 249 250 if (NumTails == TailDupLimit) 251 break; 252 253 bool IsSimple = isSimpleBB(MBB); 254 255 if (!shouldTailDuplicate(IsSimple, *MBB)) 256 continue; 257 258 MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB); 259 } 260 261 if (PreRegAlloc && TailDupVerify) 262 VerifyPHIs(*MF, false); 263 264 return MadeChange; 265 } 266 267 static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 268 const MachineRegisterInfo *MRI) { 269 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 270 if (UseMI.isDebugValue()) 271 continue; 272 if (UseMI.getParent() != BB) 273 return true; 274 } 275 return false; 276 } 277 278 static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 279 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 280 if (MI->getOperand(i + 1).getMBB() == SrcBB) 281 return i; 282 return 0; 283 } 284 285 // Remember which registers are used by phis in this block. This is 286 // used to determine which registers are liveout while modifying the 287 // block (which is why we need to copy the information). 288 static void getRegsUsedByPHIs(const MachineBasicBlock &BB, 289 DenseSet<unsigned> *UsedByPhi) { 290 for (const auto &MI : BB) { 291 if (!MI.isPHI()) 292 break; 293 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 294 unsigned SrcReg = MI.getOperand(i).getReg(); 295 UsedByPhi->insert(SrcReg); 296 } 297 } 298 } 299 300 /// Add a definition and source virtual registers pair for SSA update. 301 void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 302 MachineBasicBlock *BB) { 303 DenseMap<unsigned, AvailableValsTy>::iterator LI = 304 SSAUpdateVals.find(OrigReg); 305 if (LI != SSAUpdateVals.end()) 306 LI->second.push_back(std::make_pair(BB, NewReg)); 307 else { 308 AvailableValsTy Vals; 309 Vals.push_back(std::make_pair(BB, NewReg)); 310 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 311 SSAUpdateVRs.push_back(OrigReg); 312 } 313 } 314 315 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the 316 /// source register that's contributed by PredBB and update SSA update map. 317 void TailDuplicator::processPHI( 318 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 319 DenseMap<unsigned, RegSubRegPair> &LocalVRMap, 320 SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies, 321 const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { 322 unsigned DefReg = MI->getOperand(0).getReg(); 323 unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); 324 assert(SrcOpIdx && "Unable to find matching PHI source?"); 325 unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); 326 unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg(); 327 const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 328 LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg))); 329 330 // Insert a copy from source to the end of the block. The def register is the 331 // available value liveout of the block. 332 unsigned NewDef = MRI->createVirtualRegister(RC); 333 Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg))); 334 if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) 335 addSSAUpdateEntry(DefReg, NewDef, PredBB); 336 337 if (!Remove) 338 return; 339 340 // Remove PredBB from the PHI node. 341 MI->RemoveOperand(SrcOpIdx + 1); 342 MI->RemoveOperand(SrcOpIdx); 343 if (MI->getNumOperands() == 1) 344 MI->eraseFromParent(); 345 } 346 347 /// Duplicate a TailBB instruction to PredBB and update 348 /// the source operands due to earlier PHI translation. 349 void TailDuplicator::duplicateInstruction( 350 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 351 DenseMap<unsigned, RegSubRegPair> &LocalVRMap, 352 const DenseSet<unsigned> &UsedByPhi) { 353 MachineInstr *NewMI = TII->duplicate(*MI, *MF); 354 if (PreRegAlloc) { 355 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { 356 MachineOperand &MO = NewMI->getOperand(i); 357 if (!MO.isReg()) 358 continue; 359 unsigned Reg = MO.getReg(); 360 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 361 continue; 362 if (MO.isDef()) { 363 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 364 unsigned NewReg = MRI->createVirtualRegister(RC); 365 MO.setReg(NewReg); 366 LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); 367 if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) 368 addSSAUpdateEntry(Reg, NewReg, PredBB); 369 } else { 370 auto VI = LocalVRMap.find(Reg); 371 if (VI != LocalVRMap.end()) { 372 // Need to make sure that the register class of the mapped register 373 // will satisfy the constraints of the class of the register being 374 // replaced. 375 auto *OrigRC = MRI->getRegClass(Reg); 376 auto *MappedRC = MRI->getRegClass(VI->second.Reg); 377 const TargetRegisterClass *ConstrRC; 378 if (VI->second.SubReg != 0) { 379 ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC, 380 VI->second.SubReg); 381 if (ConstrRC) { 382 // The actual constraining (as in "find appropriate new class") 383 // is done by getMatchingSuperRegClass, so now we only need to 384 // change the class of the mapped register. 385 MRI->setRegClass(VI->second.Reg, ConstrRC); 386 } 387 } else { 388 // For mapped registers that do not have sub-registers, simply 389 // restrict their class to match the original one. 390 ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC); 391 } 392 393 if (ConstrRC) { 394 // If the class constraining succeeded, we can simply replace 395 // the old register with the mapped one. 396 MO.setReg(VI->second.Reg); 397 // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a 398 // sub-register, we need to compose the sub-register indices. 399 MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(), 400 VI->second.SubReg)); 401 } else { 402 // The direct replacement is not possible, due to failing register 403 // class constraints. An explicit COPY is necessary. Create one 404 // that can be reused 405 auto *NewRC = MI->getRegClassConstraint(i, TII, TRI); 406 if (NewRC == nullptr) 407 NewRC = OrigRC; 408 unsigned NewReg = MRI->createVirtualRegister(NewRC); 409 BuildMI(*PredBB, MI, MI->getDebugLoc(), 410 TII->get(TargetOpcode::COPY), NewReg) 411 .addReg(VI->second.Reg, 0, VI->second.SubReg); 412 LocalVRMap.erase(VI); 413 LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); 414 MO.setReg(NewReg); 415 // The composed VI.Reg:VI.SubReg is replaced with NewReg, which 416 // is equivalent to the whole register Reg. Hence, Reg:subreg 417 // is same as NewReg:subreg, so keep the sub-register index 418 // unchanged. 419 } 420 // Clear any kill flags from this operand. The new register could 421 // have uses after this one, so kills are not valid here. 422 MO.setIsKill(false); 423 } 424 } 425 } 426 } 427 PredBB->insert(PredBB->instr_end(), NewMI); 428 } 429 430 /// After FromBB is tail duplicated into its predecessor blocks, the successors 431 /// have gained new predecessors. Update the PHI instructions in them 432 /// accordingly. 433 void TailDuplicator::updateSuccessorsPHIs( 434 MachineBasicBlock *FromBB, bool isDead, 435 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 436 SmallSetVector<MachineBasicBlock *, 8> &Succs) { 437 for (MachineBasicBlock *SuccBB : Succs) { 438 for (MachineInstr &MI : *SuccBB) { 439 if (!MI.isPHI()) 440 break; 441 MachineInstrBuilder MIB(*FromBB->getParent(), MI); 442 unsigned Idx = 0; 443 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 444 MachineOperand &MO = MI.getOperand(i + 1); 445 if (MO.getMBB() == FromBB) { 446 Idx = i; 447 break; 448 } 449 } 450 451 assert(Idx != 0); 452 MachineOperand &MO0 = MI.getOperand(Idx); 453 unsigned Reg = MO0.getReg(); 454 if (isDead) { 455 // Folded into the previous BB. 456 // There could be duplicate phi source entries. FIXME: Should sdisel 457 // or earlier pass fixed this? 458 for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) { 459 MachineOperand &MO = MI.getOperand(i + 1); 460 if (MO.getMBB() == FromBB) { 461 MI.RemoveOperand(i + 1); 462 MI.RemoveOperand(i); 463 } 464 } 465 } else 466 Idx = 0; 467 468 // If Idx is set, the operands at Idx and Idx+1 must be removed. 469 // We reuse the location to avoid expensive RemoveOperand calls. 470 471 DenseMap<unsigned, AvailableValsTy>::iterator LI = 472 SSAUpdateVals.find(Reg); 473 if (LI != SSAUpdateVals.end()) { 474 // This register is defined in the tail block. 475 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 476 MachineBasicBlock *SrcBB = LI->second[j].first; 477 // If we didn't duplicate a bb into a particular predecessor, we 478 // might still have added an entry to SSAUpdateVals to correcly 479 // recompute SSA. If that case, avoid adding a dummy extra argument 480 // this PHI. 481 if (!SrcBB->isSuccessor(SuccBB)) 482 continue; 483 484 unsigned SrcReg = LI->second[j].second; 485 if (Idx != 0) { 486 MI.getOperand(Idx).setReg(SrcReg); 487 MI.getOperand(Idx + 1).setMBB(SrcBB); 488 Idx = 0; 489 } else { 490 MIB.addReg(SrcReg).addMBB(SrcBB); 491 } 492 } 493 } else { 494 // Live in tail block, must also be live in predecessors. 495 for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { 496 MachineBasicBlock *SrcBB = TDBBs[j]; 497 if (Idx != 0) { 498 MI.getOperand(Idx).setReg(Reg); 499 MI.getOperand(Idx + 1).setMBB(SrcBB); 500 Idx = 0; 501 } else { 502 MIB.addReg(Reg).addMBB(SrcBB); 503 } 504 } 505 } 506 if (Idx != 0) { 507 MI.RemoveOperand(Idx + 1); 508 MI.RemoveOperand(Idx); 509 } 510 } 511 } 512 } 513 514 /// Determine if it is profitable to duplicate this block. 515 bool TailDuplicator::shouldTailDuplicate(bool IsSimple, 516 MachineBasicBlock &TailBB) { 517 // Only duplicate blocks that end with unconditional branches. 518 if (TailBB.canFallThrough()) 519 return false; 520 521 // Don't try to tail-duplicate single-block loops. 522 if (TailBB.isSuccessor(&TailBB)) 523 return false; 524 525 // Set the limit on the cost to duplicate. When optimizing for size, 526 // duplicate only one, because one branch instruction can be eliminated to 527 // compensate for the duplication. 528 unsigned MaxDuplicateCount; 529 if (TailDupSize == 0 && 530 TailDuplicateSize.getNumOccurrences() == 0 && 531 MF->getFunction()->optForSize()) 532 MaxDuplicateCount = 1; 533 else if (TailDupSize == 0) 534 MaxDuplicateCount = TailDuplicateSize; 535 else 536 MaxDuplicateCount = TailDupSize; 537 538 // If the block to be duplicated ends in an unanalyzable fallthrough, don't 539 // duplicate it. 540 // A similar check is necessary in MachineBlockPlacement to make sure pairs of 541 // blocks with unanalyzable fallthrough get layed out contiguously. 542 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 543 SmallVector<MachineOperand, 4> PredCond; 544 if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond, true) 545 && TailBB.canFallThrough()) 546 return false; 547 548 // If the target has hardware branch prediction that can handle indirect 549 // branches, duplicating them can often make them predictable when there 550 // are common paths through the code. The limit needs to be high enough 551 // to allow undoing the effects of tail merging and other optimizations 552 // that rearrange the predecessors of the indirect branch. 553 554 bool HasIndirectbr = false; 555 if (!TailBB.empty()) 556 HasIndirectbr = TailBB.back().isIndirectBranch(); 557 558 if (HasIndirectbr && PreRegAlloc) 559 MaxDuplicateCount = TailDupIndirectBranchSize; 560 561 // Check the instructions in the block to determine whether tail-duplication 562 // is invalid or unlikely to be profitable. 563 unsigned InstrCount = 0; 564 for (MachineInstr &MI : TailBB) { 565 // Non-duplicable things shouldn't be tail-duplicated. 566 if (MI.isNotDuplicable()) 567 return false; 568 569 // Convergent instructions can be duplicated only if doing so doesn't add 570 // new control dependencies, which is what we're going to do here. 571 if (MI.isConvergent()) 572 return false; 573 574 // Do not duplicate 'return' instructions if this is a pre-regalloc run. 575 // A return may expand into a lot more instructions (e.g. reload of callee 576 // saved registers) after PEI. 577 if (PreRegAlloc && MI.isReturn()) 578 return false; 579 580 // Avoid duplicating calls before register allocation. Calls presents a 581 // barrier to register allocation so duplicating them may end up increasing 582 // spills. 583 if (PreRegAlloc && MI.isCall()) 584 return false; 585 586 if (!MI.isPHI() && !MI.isDebugValue()) 587 InstrCount += 1; 588 589 if (InstrCount > MaxDuplicateCount) 590 return false; 591 } 592 593 // Check if any of the successors of TailBB has a PHI node in which the 594 // value corresponding to TailBB uses a subregister. 595 // If a phi node uses a register paired with a subregister, the actual 596 // "value type" of the phi may differ from the type of the register without 597 // any subregisters. Due to a bug, tail duplication may add a new operand 598 // without a necessary subregister, producing an invalid code. This is 599 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. 600 // Disable tail duplication for this case for now, until the problem is 601 // fixed. 602 for (auto SB : TailBB.successors()) { 603 for (auto &I : *SB) { 604 if (!I.isPHI()) 605 break; 606 unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); 607 assert(Idx != 0); 608 MachineOperand &PU = I.getOperand(Idx); 609 if (PU.getSubReg() != 0) 610 return false; 611 } 612 } 613 614 if (HasIndirectbr && PreRegAlloc) 615 return true; 616 617 if (IsSimple) 618 return true; 619 620 if (!PreRegAlloc) 621 return true; 622 623 return canCompletelyDuplicateBB(TailBB); 624 } 625 626 /// True if this BB has only one unconditional jump. 627 bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) { 628 if (TailBB->succ_size() != 1) 629 return false; 630 if (TailBB->pred_empty()) 631 return false; 632 MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(); 633 if (I == TailBB->end()) 634 return true; 635 return I->isUnconditionalBranch(); 636 } 637 638 static bool bothUsedInPHI(const MachineBasicBlock &A, 639 const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) { 640 for (MachineBasicBlock *BB : A.successors()) 641 if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) 642 return true; 643 644 return false; 645 } 646 647 bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { 648 for (MachineBasicBlock *PredBB : BB.predecessors()) { 649 if (PredBB->succ_size() > 1) 650 return false; 651 652 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 653 SmallVector<MachineOperand, 4> PredCond; 654 if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 655 return false; 656 657 if (!PredCond.empty()) 658 return false; 659 } 660 return true; 661 } 662 663 bool TailDuplicator::duplicateSimpleBB( 664 MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, 665 const DenseSet<unsigned> &UsedByPhi, 666 SmallVectorImpl<MachineInstr *> &Copies) { 667 SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), 668 TailBB->succ_end()); 669 SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 670 TailBB->pred_end()); 671 bool Changed = false; 672 for (MachineBasicBlock *PredBB : Preds) { 673 if (PredBB->hasEHPadSuccessor()) 674 continue; 675 676 if (bothUsedInPHI(*PredBB, Succs)) 677 continue; 678 679 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 680 SmallVector<MachineOperand, 4> PredCond; 681 if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 682 continue; 683 684 Changed = true; 685 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 686 << "From simple Succ: " << *TailBB); 687 688 MachineBasicBlock *NewTarget = *TailBB->succ_begin(); 689 MachineBasicBlock *NextBB = PredBB->getNextNode(); 690 691 // Make PredFBB explicit. 692 if (PredCond.empty()) 693 PredFBB = PredTBB; 694 695 // Make fall through explicit. 696 if (!PredTBB) 697 PredTBB = NextBB; 698 if (!PredFBB) 699 PredFBB = NextBB; 700 701 // Redirect 702 if (PredFBB == TailBB) 703 PredFBB = NewTarget; 704 if (PredTBB == TailBB) 705 PredTBB = NewTarget; 706 707 // Make the branch unconditional if possible 708 if (PredTBB == PredFBB) { 709 PredCond.clear(); 710 PredFBB = nullptr; 711 } 712 713 // Avoid adding fall through branches. 714 if (PredFBB == NextBB) 715 PredFBB = nullptr; 716 if (PredTBB == NextBB && PredFBB == nullptr) 717 PredTBB = nullptr; 718 719 TII->removeBranch(*PredBB); 720 721 if (!PredBB->isSuccessor(NewTarget)) 722 PredBB->replaceSuccessor(TailBB, NewTarget); 723 else { 724 PredBB->removeSuccessor(TailBB, true); 725 assert(PredBB->succ_size() <= 1); 726 } 727 728 if (PredTBB) 729 TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); 730 731 TDBBs.push_back(PredBB); 732 } 733 return Changed; 734 } 735 736 bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, 737 MachineBasicBlock *PredBB) { 738 // EH edges are ignored by AnalyzeBranch. 739 if (PredBB->succ_size() > 1) 740 return false; 741 742 MachineBasicBlock *PredTBB, *PredFBB; 743 SmallVector<MachineOperand, 4> PredCond; 744 if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 745 return false; 746 if (!PredCond.empty()) 747 return false; 748 return true; 749 } 750 751 /// If it is profitable, duplicate TailBB's contents in each 752 /// of its predecessors. 753 bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, 754 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 755 SmallVectorImpl<MachineInstr *> &Copies) { 756 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); 757 758 DenseSet<unsigned> UsedByPhi; 759 getRegsUsedByPHIs(*TailBB, &UsedByPhi); 760 761 if (IsSimple) 762 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); 763 764 // Iterate through all the unique predecessors and tail-duplicate this 765 // block into them, if possible. Copying the list ahead of time also 766 // avoids trouble with the predecessor list reallocating. 767 bool Changed = false; 768 SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 769 TailBB->pred_end()); 770 for (MachineBasicBlock *PredBB : Preds) { 771 assert(TailBB != PredBB && 772 "Single-block loop should have been rejected earlier!"); 773 774 if (!canTailDuplicate(TailBB, PredBB)) 775 continue; 776 777 // Don't duplicate into a fall-through predecessor (at least for now). 778 if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 779 continue; 780 781 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 782 << "From Succ: " << *TailBB); 783 784 TDBBs.push_back(PredBB); 785 786 // Remove PredBB's unconditional branch. 787 TII->removeBranch(*PredBB); 788 789 // Clone the contents of TailBB into PredBB. 790 DenseMap<unsigned, RegSubRegPair> LocalVRMap; 791 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 792 // Use instr_iterator here to properly handle bundles, e.g. 793 // ARM Thumb2 IT block. 794 MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); 795 while (I != TailBB->instr_end()) { 796 MachineInstr *MI = &*I; 797 ++I; 798 if (MI->isPHI()) { 799 // Replace the uses of the def of the PHI with the register coming 800 // from PredBB. 801 processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 802 } else { 803 // Replace def of virtual registers with new registers, and update 804 // uses with PHI source register or the new registers. 805 duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi); 806 } 807 } 808 appendCopies(PredBB, CopyInfos, Copies); 809 810 // Simplify 811 MachineBasicBlock *PredTBB, *PredFBB; 812 SmallVector<MachineOperand, 4> PredCond; 813 TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); 814 815 NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch 816 817 // Update the CFG. 818 PredBB->removeSuccessor(PredBB->succ_begin()); 819 assert(PredBB->succ_empty() && 820 "TailDuplicate called on block with multiple successors!"); 821 for (MachineBasicBlock *Succ : TailBB->successors()) 822 PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); 823 824 Changed = true; 825 ++NumTailDups; 826 } 827 828 // If TailBB was duplicated into all its predecessors except for the prior 829 // block, which falls through unconditionally, move the contents of this 830 // block into the prior block. 831 MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); 832 MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; 833 SmallVector<MachineOperand, 4> PriorCond; 834 // This has to check PrevBB->succ_size() because EH edges are ignored by 835 // AnalyzeBranch. 836 if (PrevBB->succ_size() == 1 && 837 // Layout preds are not always CFG preds. Check. 838 *PrevBB->succ_begin() == TailBB && 839 !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && 840 PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && 841 !TailBB->hasAddressTaken()) { 842 DEBUG(dbgs() << "\nMerging into block: " << *PrevBB 843 << "From MBB: " << *TailBB); 844 if (PreRegAlloc) { 845 DenseMap<unsigned, RegSubRegPair> LocalVRMap; 846 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 847 MachineBasicBlock::iterator I = TailBB->begin(); 848 // Process PHI instructions first. 849 while (I != TailBB->end() && I->isPHI()) { 850 // Replace the uses of the def of the PHI with the register coming 851 // from PredBB. 852 MachineInstr *MI = &*I++; 853 processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); 854 } 855 856 // Now copy the non-PHI instructions. 857 while (I != TailBB->end()) { 858 // Replace def of virtual registers with new registers, and update 859 // uses with PHI source register or the new registers. 860 MachineInstr *MI = &*I++; 861 assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); 862 duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); 863 MI->eraseFromParent(); 864 } 865 appendCopies(PrevBB, CopyInfos, Copies); 866 } else { 867 // No PHIs to worry about, just splice the instructions over. 868 PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); 869 } 870 PrevBB->removeSuccessor(PrevBB->succ_begin()); 871 assert(PrevBB->succ_empty()); 872 PrevBB->transferSuccessors(TailBB); 873 TDBBs.push_back(PrevBB); 874 Changed = true; 875 } 876 877 // If this is after register allocation, there are no phis to fix. 878 if (!PreRegAlloc) 879 return Changed; 880 881 // If we made no changes so far, we are safe. 882 if (!Changed) 883 return Changed; 884 885 // Handle the nasty case in that we duplicated a block that is part of a loop 886 // into some but not all of its predecessors. For example: 887 // 1 -> 2 <-> 3 | 888 // \ | 889 // \---> rest | 890 // if we duplicate 2 into 1 but not into 3, we end up with 891 // 12 -> 3 <-> 2 -> rest | 892 // \ / | 893 // \----->-----/ | 894 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced 895 // with a phi in 3 (which now dominates 2). 896 // What we do here is introduce a copy in 3 of the register defined by the 897 // phi, just like when we are duplicating 2 into 3, but we don't copy any 898 // real instructions or remove the 3 -> 2 edge from the phi in 2. 899 for (MachineBasicBlock *PredBB : Preds) { 900 if (is_contained(TDBBs, PredBB)) 901 continue; 902 903 // EH edges 904 if (PredBB->succ_size() != 1) 905 continue; 906 907 DenseMap<unsigned, RegSubRegPair> LocalVRMap; 908 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 909 MachineBasicBlock::iterator I = TailBB->begin(); 910 // Process PHI instructions first. 911 while (I != TailBB->end() && I->isPHI()) { 912 // Replace the uses of the def of the PHI with the register coming 913 // from PredBB. 914 MachineInstr *MI = &*I++; 915 processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); 916 } 917 appendCopies(PredBB, CopyInfos, Copies); 918 } 919 920 return Changed; 921 } 922 923 /// At the end of the block \p MBB generate COPY instructions between registers 924 /// described by \p CopyInfos. Append resulting instructions to \p Copies. 925 void TailDuplicator::appendCopies(MachineBasicBlock *MBB, 926 SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos, 927 SmallVectorImpl<MachineInstr*> &Copies) { 928 MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); 929 const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); 930 for (auto &CI : CopyInfos) { 931 auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first) 932 .addReg(CI.second.Reg, 0, CI.second.SubReg); 933 Copies.push_back(C); 934 } 935 } 936 937 /// Remove the specified dead machine basic block from the function, updating 938 /// the CFG. 939 void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) { 940 assert(MBB->pred_empty() && "MBB must be dead!"); 941 DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); 942 943 // Remove all successors. 944 while (!MBB->succ_empty()) 945 MBB->removeSuccessor(MBB->succ_end() - 1); 946 947 // Remove the block. 948 MBB->eraseFromParent(); 949 } 950 951 } // End llvm namespace 952