1 //===-- TailDuplicator.cpp - Duplicate blocks into predecessors' tails ---===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This utility class duplicates basic blocks ending in unconditional branches 11 // into the tails of their predecessors. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/CodeGen/TailDuplicator.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SetVector.h" 18 #include "llvm/ADT/SmallSet.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 21 #include "llvm/CodeGen/MachineFunctionPass.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/IR/Function.h" 26 #include "llvm/Support/CommandLine.h" 27 #include "llvm/Support/Debug.h" 28 #include "llvm/Support/ErrorHandling.h" 29 #include "llvm/Support/raw_ostream.h" 30 using namespace llvm; 31 32 #define DEBUG_TYPE "tailduplication" 33 34 STATISTIC(NumTails, "Number of tails duplicated"); 35 STATISTIC(NumTailDups, "Number of tail duplicated blocks"); 36 STATISTIC(NumInstrDups, "Additional instructions due to tail duplication"); 37 STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 38 STATISTIC(NumAddedPHIs, "Number of phis added"); 39 40 // Heuristic for tail duplication. 41 static cl::opt<unsigned> TailDuplicateSize( 42 "tail-dup-size", 43 cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), 44 cl::Hidden); 45 46 static cl::opt<bool> 47 TailDupVerify("tail-dup-verify", 48 cl::desc("Verify sanity of PHI instructions during taildup"), 49 cl::init(false), cl::Hidden); 50 51 static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), 52 cl::Hidden); 53 54 namespace llvm { 55 56 void TailDuplicator::initMF(MachineFunction &MF, const MachineModuleInfo *MMIin, 57 const MachineBranchProbabilityInfo *MBPIin) { 58 TII = MF.getSubtarget().getInstrInfo(); 59 TRI = MF.getSubtarget().getRegisterInfo(); 60 MRI = &MF.getRegInfo(); 61 MMI = MMIin; 62 MBPI = MBPIin; 63 64 assert(MBPI != nullptr && "Machine Branch Probability Info required"); 65 66 PreRegAlloc = MRI->isSSA(); 67 RS.reset(); 68 69 if (MRI->tracksLiveness() && TRI->trackLivenessAfterRegAlloc(MF)) 70 RS.reset(new RegScavenger()); 71 } 72 73 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { 74 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { 75 MachineBasicBlock *MBB = &*I; 76 SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(), 77 MBB->pred_end()); 78 MachineBasicBlock::iterator MI = MBB->begin(); 79 while (MI != MBB->end()) { 80 if (!MI->isPHI()) 81 break; 82 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 83 PE = Preds.end(); 84 PI != PE; ++PI) { 85 MachineBasicBlock *PredBB = *PI; 86 bool Found = false; 87 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 88 MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 89 if (PHIBB == PredBB) { 90 Found = true; 91 break; 92 } 93 } 94 if (!Found) { 95 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 96 dbgs() << " missing input from predecessor BB#" 97 << PredBB->getNumber() << '\n'; 98 llvm_unreachable(nullptr); 99 } 100 } 101 102 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 103 MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 104 if (CheckExtra && !Preds.count(PHIBB)) { 105 dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": " 106 << *MI; 107 dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() 108 << '\n'; 109 llvm_unreachable(nullptr); 110 } 111 if (PHIBB->getNumber() < 0) { 112 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 113 dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; 114 llvm_unreachable(nullptr); 115 } 116 } 117 ++MI; 118 } 119 } 120 } 121 122 /// Tail duplicate the block and cleanup. 123 bool TailDuplicator::tailDuplicateAndUpdate(MachineFunction &MF, bool IsSimple, 124 MachineBasicBlock *MBB) { 125 // Save the successors list. 126 SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(), 127 MBB->succ_end()); 128 129 SmallVector<MachineBasicBlock *, 8> TDBBs; 130 SmallVector<MachineInstr *, 16> Copies; 131 if (!tailDuplicate(MF, IsSimple, MBB, TDBBs, Copies)) 132 return false; 133 134 ++NumTails; 135 136 SmallVector<MachineInstr *, 8> NewPHIs; 137 MachineSSAUpdater SSAUpdate(MF, &NewPHIs); 138 139 // TailBB's immediate successors are now successors of those predecessors 140 // which duplicated TailBB. Add the predecessors as sources to the PHI 141 // instructions. 142 bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); 143 if (PreRegAlloc) 144 updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); 145 146 // If it is dead, remove it. 147 if (isDead) { 148 NumInstrDups -= MBB->size(); 149 removeDeadBlock(MBB); 150 ++NumDeadBlocks; 151 } 152 153 // Update SSA form. 154 if (!SSAUpdateVRs.empty()) { 155 for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 156 unsigned VReg = SSAUpdateVRs[i]; 157 SSAUpdate.Initialize(VReg); 158 159 // If the original definition is still around, add it as an available 160 // value. 161 MachineInstr *DefMI = MRI->getVRegDef(VReg); 162 MachineBasicBlock *DefBB = nullptr; 163 if (DefMI) { 164 DefBB = DefMI->getParent(); 165 SSAUpdate.AddAvailableValue(DefBB, VReg); 166 } 167 168 // Add the new vregs as available values. 169 DenseMap<unsigned, AvailableValsTy>::iterator LI = 170 SSAUpdateVals.find(VReg); 171 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 172 MachineBasicBlock *SrcBB = LI->second[j].first; 173 unsigned SrcReg = LI->second[j].second; 174 SSAUpdate.AddAvailableValue(SrcBB, SrcReg); 175 } 176 177 // Rewrite uses that are outside of the original def's block. 178 MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); 179 while (UI != MRI->use_end()) { 180 MachineOperand &UseMO = *UI; 181 MachineInstr *UseMI = UseMO.getParent(); 182 ++UI; 183 if (UseMI->isDebugValue()) { 184 // SSAUpdate can replace the use with an undef. That creates 185 // a debug instruction that is a kill. 186 // FIXME: Should it SSAUpdate job to delete debug instructions 187 // instead of replacing the use with undef? 188 UseMI->eraseFromParent(); 189 continue; 190 } 191 if (UseMI->getParent() == DefBB && !UseMI->isPHI()) 192 continue; 193 SSAUpdate.RewriteUse(UseMO); 194 } 195 } 196 197 SSAUpdateVRs.clear(); 198 SSAUpdateVals.clear(); 199 } 200 201 // Eliminate some of the copies inserted by tail duplication to maintain 202 // SSA form. 203 for (unsigned i = 0, e = Copies.size(); i != e; ++i) { 204 MachineInstr *Copy = Copies[i]; 205 if (!Copy->isCopy()) 206 continue; 207 unsigned Dst = Copy->getOperand(0).getReg(); 208 unsigned Src = Copy->getOperand(1).getReg(); 209 if (MRI->hasOneNonDBGUse(Src) && 210 MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { 211 // Copy is the only use. Do trivial copy propagation here. 212 MRI->replaceRegWith(Dst, Src); 213 Copy->eraseFromParent(); 214 } 215 } 216 217 if (NewPHIs.size()) 218 NumAddedPHIs += NewPHIs.size(); 219 220 return true; 221 } 222 223 /// Look for small blocks that are unconditionally branched to and do not fall 224 /// through. Tail-duplicate their instructions into their predecessors to 225 /// eliminate (dynamic) branches. 226 bool TailDuplicator::tailDuplicateBlocks(MachineFunction &MF) { 227 bool MadeChange = false; 228 229 if (PreRegAlloc && TailDupVerify) { 230 DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); 231 VerifyPHIs(MF, true); 232 } 233 234 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E;) { 235 MachineBasicBlock *MBB = &*I++; 236 237 if (NumTails == TailDupLimit) 238 break; 239 240 bool IsSimple = isSimpleBB(MBB); 241 242 if (!shouldTailDuplicate(MF, IsSimple, *MBB)) 243 continue; 244 245 MadeChange |= tailDuplicateAndUpdate(MF, IsSimple, MBB); 246 } 247 248 if (PreRegAlloc && TailDupVerify) 249 VerifyPHIs(MF, false); 250 251 return MadeChange; 252 } 253 254 static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 255 const MachineRegisterInfo *MRI) { 256 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 257 if (UseMI.isDebugValue()) 258 continue; 259 if (UseMI.getParent() != BB) 260 return true; 261 } 262 return false; 263 } 264 265 static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 266 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 267 if (MI->getOperand(i + 1).getMBB() == SrcBB) 268 return i; 269 return 0; 270 } 271 272 // Remember which registers are used by phis in this block. This is 273 // used to determine which registers are liveout while modifying the 274 // block (which is why we need to copy the information). 275 static void getRegsUsedByPHIs(const MachineBasicBlock &BB, 276 DenseSet<unsigned> *UsedByPhi) { 277 for (const auto &MI : BB) { 278 if (!MI.isPHI()) 279 break; 280 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 281 unsigned SrcReg = MI.getOperand(i).getReg(); 282 UsedByPhi->insert(SrcReg); 283 } 284 } 285 } 286 287 /// Add a definition and source virtual registers pair for SSA update. 288 void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 289 MachineBasicBlock *BB) { 290 DenseMap<unsigned, AvailableValsTy>::iterator LI = 291 SSAUpdateVals.find(OrigReg); 292 if (LI != SSAUpdateVals.end()) 293 LI->second.push_back(std::make_pair(BB, NewReg)); 294 else { 295 AvailableValsTy Vals; 296 Vals.push_back(std::make_pair(BB, NewReg)); 297 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 298 SSAUpdateVRs.push_back(OrigReg); 299 } 300 } 301 302 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the 303 /// source register that's contributed by PredBB and update SSA update map. 304 void TailDuplicator::processPHI( 305 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 306 DenseMap<unsigned, RegSubRegPair> &LocalVRMap, 307 SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies, 308 const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { 309 unsigned DefReg = MI->getOperand(0).getReg(); 310 unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); 311 assert(SrcOpIdx && "Unable to find matching PHI source?"); 312 unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); 313 unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg(); 314 const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 315 LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg))); 316 317 // Insert a copy from source to the end of the block. The def register is the 318 // available value liveout of the block. 319 unsigned NewDef = MRI->createVirtualRegister(RC); 320 Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg))); 321 if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) 322 addSSAUpdateEntry(DefReg, NewDef, PredBB); 323 324 if (!Remove) 325 return; 326 327 // Remove PredBB from the PHI node. 328 MI->RemoveOperand(SrcOpIdx + 1); 329 MI->RemoveOperand(SrcOpIdx); 330 if (MI->getNumOperands() == 1) 331 MI->eraseFromParent(); 332 } 333 334 /// Duplicate a TailBB instruction to PredBB and update 335 /// the source operands due to earlier PHI translation. 336 void TailDuplicator::duplicateInstruction( 337 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 338 MachineFunction &MF, 339 DenseMap<unsigned, RegSubRegPair> &LocalVRMap, 340 const DenseSet<unsigned> &UsedByPhi) { 341 MachineInstr *NewMI = TII->duplicate(MI, MF); 342 if (PreRegAlloc) { 343 for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) { 344 MachineOperand &MO = NewMI->getOperand(i); 345 if (!MO.isReg()) 346 continue; 347 unsigned Reg = MO.getReg(); 348 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 349 continue; 350 if (MO.isDef()) { 351 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 352 unsigned NewReg = MRI->createVirtualRegister(RC); 353 MO.setReg(NewReg); 354 LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); 355 if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) 356 addSSAUpdateEntry(Reg, NewReg, PredBB); 357 } else { 358 auto VI = LocalVRMap.find(Reg); 359 if (VI != LocalVRMap.end()) { 360 // Need to make sure that the register class of the mapped register 361 // will satisfy the constraints of the class of the register being 362 // replaced. 363 auto *OrigRC = MRI->getRegClass(Reg); 364 auto *MappedRC = MRI->getRegClass(VI->second.Reg); 365 const TargetRegisterClass *ConstrRC; 366 if (VI->second.SubReg != 0) { 367 ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC, 368 VI->second.SubReg); 369 if (ConstrRC) { 370 // The actual constraining (as in "find appropriate new class") 371 // is done by getMatchingSuperRegClass, so now we only need to 372 // change the class of the mapped register. 373 MRI->setRegClass(VI->second.Reg, ConstrRC); 374 } 375 } else { 376 // For mapped registers that do not have sub-registers, simply 377 // restrict their class to match the original one. 378 ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC); 379 } 380 381 if (ConstrRC) { 382 // If the class constraining succeeded, we can simply replace 383 // the old register with the mapped one. 384 MO.setReg(VI->second.Reg); 385 // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a 386 // sub-register, we need to compose the sub-register indices. 387 MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(), 388 VI->second.SubReg)); 389 } else { 390 // The direct replacement is not possible, due to failing register 391 // class constraints. An explicit COPY is necessary. Create one 392 // that can be reused 393 auto *NewRC = MI->getRegClassConstraint(i, TII, TRI); 394 if (NewRC == nullptr) 395 NewRC = OrigRC; 396 unsigned NewReg = MRI->createVirtualRegister(NewRC); 397 BuildMI(*PredBB, MI, MI->getDebugLoc(), 398 TII->get(TargetOpcode::COPY), NewReg) 399 .addReg(VI->second.Reg, 0, VI->second.SubReg); 400 LocalVRMap.erase(VI); 401 LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); 402 MO.setReg(NewReg); 403 // The composed VI.Reg:VI.SubReg is replaced with NewReg, which 404 // is equivalent to the whole register Reg. Hence, Reg:subreg 405 // is same as NewReg:subreg, so keep the sub-register index 406 // unchanged. 407 } 408 // Clear any kill flags from this operand. The new register could 409 // have uses after this one, so kills are not valid here. 410 MO.setIsKill(false); 411 } 412 } 413 } 414 } 415 PredBB->insert(PredBB->instr_end(), NewMI); 416 } 417 418 /// After FromBB is tail duplicated into its predecessor blocks, the successors 419 /// have gained new predecessors. Update the PHI instructions in them 420 /// accordingly. 421 void TailDuplicator::updateSuccessorsPHIs( 422 MachineBasicBlock *FromBB, bool isDead, 423 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 424 SmallSetVector<MachineBasicBlock *, 8> &Succs) { 425 for (SmallSetVector<MachineBasicBlock *, 8>::iterator SI = Succs.begin(), 426 SE = Succs.end(); 427 SI != SE; ++SI) { 428 MachineBasicBlock *SuccBB = *SI; 429 for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end(); 430 II != EE; ++II) { 431 if (!II->isPHI()) 432 break; 433 MachineInstrBuilder MIB(*FromBB->getParent(), II); 434 unsigned Idx = 0; 435 for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { 436 MachineOperand &MO = II->getOperand(i + 1); 437 if (MO.getMBB() == FromBB) { 438 Idx = i; 439 break; 440 } 441 } 442 443 assert(Idx != 0); 444 MachineOperand &MO0 = II->getOperand(Idx); 445 unsigned Reg = MO0.getReg(); 446 if (isDead) { 447 // Folded into the previous BB. 448 // There could be duplicate phi source entries. FIXME: Should sdisel 449 // or earlier pass fixed this? 450 for (unsigned i = II->getNumOperands() - 2; i != Idx; i -= 2) { 451 MachineOperand &MO = II->getOperand(i + 1); 452 if (MO.getMBB() == FromBB) { 453 II->RemoveOperand(i + 1); 454 II->RemoveOperand(i); 455 } 456 } 457 } else 458 Idx = 0; 459 460 // If Idx is set, the operands at Idx and Idx+1 must be removed. 461 // We reuse the location to avoid expensive RemoveOperand calls. 462 463 DenseMap<unsigned, AvailableValsTy>::iterator LI = 464 SSAUpdateVals.find(Reg); 465 if (LI != SSAUpdateVals.end()) { 466 // This register is defined in the tail block. 467 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 468 MachineBasicBlock *SrcBB = LI->second[j].first; 469 // If we didn't duplicate a bb into a particular predecessor, we 470 // might still have added an entry to SSAUpdateVals to correcly 471 // recompute SSA. If that case, avoid adding a dummy extra argument 472 // this PHI. 473 if (!SrcBB->isSuccessor(SuccBB)) 474 continue; 475 476 unsigned SrcReg = LI->second[j].second; 477 if (Idx != 0) { 478 II->getOperand(Idx).setReg(SrcReg); 479 II->getOperand(Idx + 1).setMBB(SrcBB); 480 Idx = 0; 481 } else { 482 MIB.addReg(SrcReg).addMBB(SrcBB); 483 } 484 } 485 } else { 486 // Live in tail block, must also be live in predecessors. 487 for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { 488 MachineBasicBlock *SrcBB = TDBBs[j]; 489 if (Idx != 0) { 490 II->getOperand(Idx).setReg(Reg); 491 II->getOperand(Idx + 1).setMBB(SrcBB); 492 Idx = 0; 493 } else { 494 MIB.addReg(Reg).addMBB(SrcBB); 495 } 496 } 497 } 498 if (Idx != 0) { 499 II->RemoveOperand(Idx + 1); 500 II->RemoveOperand(Idx); 501 } 502 } 503 } 504 } 505 506 /// Determine if it is profitable to duplicate this block. 507 bool TailDuplicator::shouldTailDuplicate(const MachineFunction &MF, 508 bool IsSimple, 509 MachineBasicBlock &TailBB) { 510 // Only duplicate blocks that end with unconditional branches. 511 if (TailBB.canFallThrough()) 512 return false; 513 514 // Don't try to tail-duplicate single-block loops. 515 if (TailBB.isSuccessor(&TailBB)) 516 return false; 517 518 // Set the limit on the cost to duplicate. When optimizing for size, 519 // duplicate only one, because one branch instruction can be eliminated to 520 // compensate for the duplication. 521 unsigned MaxDuplicateCount; 522 if (TailDuplicateSize.getNumOccurrences() == 0 && 523 // FIXME: Use Function::optForSize(). 524 MF.getFunction()->hasFnAttribute(Attribute::OptimizeForSize)) 525 MaxDuplicateCount = 1; 526 else 527 MaxDuplicateCount = TailDuplicateSize; 528 529 // If the target has hardware branch prediction that can handle indirect 530 // branches, duplicating them can often make them predictable when there 531 // are common paths through the code. The limit needs to be high enough 532 // to allow undoing the effects of tail merging and other optimizations 533 // that rearrange the predecessors of the indirect branch. 534 535 bool HasIndirectbr = false; 536 if (!TailBB.empty()) 537 HasIndirectbr = TailBB.back().isIndirectBranch(); 538 539 if (HasIndirectbr && PreRegAlloc) 540 MaxDuplicateCount = 20; 541 542 // Check the instructions in the block to determine whether tail-duplication 543 // is invalid or unlikely to be profitable. 544 unsigned InstrCount = 0; 545 for (MachineInstr &MI : TailBB) { 546 // Non-duplicable things shouldn't be tail-duplicated. 547 if (MI.isNotDuplicable()) 548 return false; 549 550 // Convergent instructions can be duplicated only if doing so doesn't add 551 // new control dependencies, which is what we're going to do here. 552 if (MI.isConvergent()) 553 return false; 554 555 // Do not duplicate 'return' instructions if this is a pre-regalloc run. 556 // A return may expand into a lot more instructions (e.g. reload of callee 557 // saved registers) after PEI. 558 if (PreRegAlloc && MI.isReturn()) 559 return false; 560 561 // Avoid duplicating calls before register allocation. Calls presents a 562 // barrier to register allocation so duplicating them may end up increasing 563 // spills. 564 if (PreRegAlloc && MI.isCall()) 565 return false; 566 567 if (!MI.isPHI() && !MI.isDebugValue()) 568 InstrCount += 1; 569 570 if (InstrCount > MaxDuplicateCount) 571 return false; 572 } 573 574 // Check if any of the successors of TailBB has a PHI node in which the 575 // value corresponding to TailBB uses a subregister. 576 // If a phi node uses a register paired with a subregister, the actual 577 // "value type" of the phi may differ from the type of the register without 578 // any subregisters. Due to a bug, tail duplication may add a new operand 579 // without a necessary subregister, producing an invalid code. This is 580 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. 581 // Disable tail duplication for this case for now, until the problem is 582 // fixed. 583 for (auto SB : TailBB.successors()) { 584 for (auto &I : *SB) { 585 if (!I.isPHI()) 586 break; 587 unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); 588 assert(Idx != 0); 589 MachineOperand &PU = I.getOperand(Idx); 590 if (PU.getSubReg() != 0) 591 return false; 592 } 593 } 594 595 if (HasIndirectbr && PreRegAlloc) 596 return true; 597 598 if (IsSimple) 599 return true; 600 601 if (!PreRegAlloc) 602 return true; 603 604 return canCompletelyDuplicateBB(TailBB); 605 } 606 607 /// True if this BB has only one unconditional jump. 608 bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) { 609 if (TailBB->succ_size() != 1) 610 return false; 611 if (TailBB->pred_empty()) 612 return false; 613 MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(); 614 if (I == TailBB->end()) 615 return true; 616 return I->isUnconditionalBranch(); 617 } 618 619 static bool bothUsedInPHI(const MachineBasicBlock &A, 620 SmallPtrSet<MachineBasicBlock *, 8> SuccsB) { 621 for (MachineBasicBlock *BB : A.successors()) 622 if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) 623 return true; 624 625 return false; 626 } 627 628 bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { 629 for (MachineBasicBlock *PredBB : BB.predecessors()) { 630 if (PredBB->succ_size() > 1) 631 return false; 632 633 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 634 SmallVector<MachineOperand, 4> PredCond; 635 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 636 return false; 637 638 if (!PredCond.empty()) 639 return false; 640 } 641 return true; 642 } 643 644 bool TailDuplicator::duplicateSimpleBB( 645 MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, 646 const DenseSet<unsigned> &UsedByPhi, 647 SmallVectorImpl<MachineInstr *> &Copies) { 648 SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), 649 TailBB->succ_end()); 650 SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 651 TailBB->pred_end()); 652 bool Changed = false; 653 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 654 PE = Preds.end(); 655 PI != PE; ++PI) { 656 MachineBasicBlock *PredBB = *PI; 657 658 if (PredBB->hasEHPadSuccessor()) 659 continue; 660 661 if (bothUsedInPHI(*PredBB, Succs)) 662 continue; 663 664 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 665 SmallVector<MachineOperand, 4> PredCond; 666 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 667 continue; 668 669 Changed = true; 670 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 671 << "From simple Succ: " << *TailBB); 672 673 MachineBasicBlock *NewTarget = *TailBB->succ_begin(); 674 MachineBasicBlock *NextBB = &*std::next(PredBB->getIterator()); 675 676 // Make PredFBB explicit. 677 if (PredCond.empty()) 678 PredFBB = PredTBB; 679 680 // Make fall through explicit. 681 if (!PredTBB) 682 PredTBB = NextBB; 683 if (!PredFBB) 684 PredFBB = NextBB; 685 686 // Redirect 687 if (PredFBB == TailBB) 688 PredFBB = NewTarget; 689 if (PredTBB == TailBB) 690 PredTBB = NewTarget; 691 692 // Make the branch unconditional if possible 693 if (PredTBB == PredFBB) { 694 PredCond.clear(); 695 PredFBB = nullptr; 696 } 697 698 // Avoid adding fall through branches. 699 if (PredFBB == NextBB) 700 PredFBB = nullptr; 701 if (PredTBB == NextBB && PredFBB == nullptr) 702 PredTBB = nullptr; 703 704 TII->RemoveBranch(*PredBB); 705 706 if (!PredBB->isSuccessor(NewTarget)) 707 PredBB->replaceSuccessor(TailBB, NewTarget); 708 else { 709 PredBB->removeSuccessor(TailBB, true); 710 assert(PredBB->succ_size() <= 1); 711 } 712 713 if (PredTBB) 714 TII->InsertBranch(*PredBB, PredTBB, PredFBB, PredCond, DebugLoc()); 715 716 TDBBs.push_back(PredBB); 717 } 718 return Changed; 719 } 720 721 /// If it is profitable, duplicate TailBB's contents in each 722 /// of its predecessors. 723 bool TailDuplicator::tailDuplicate(MachineFunction &MF, bool IsSimple, 724 MachineBasicBlock *TailBB, 725 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 726 SmallVectorImpl<MachineInstr *> &Copies) { 727 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); 728 729 DenseSet<unsigned> UsedByPhi; 730 getRegsUsedByPHIs(*TailBB, &UsedByPhi); 731 732 if (IsSimple) 733 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); 734 735 // Iterate through all the unique predecessors and tail-duplicate this 736 // block into them, if possible. Copying the list ahead of time also 737 // avoids trouble with the predecessor list reallocating. 738 bool Changed = false; 739 SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 740 TailBB->pred_end()); 741 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 742 PE = Preds.end(); 743 PI != PE; ++PI) { 744 MachineBasicBlock *PredBB = *PI; 745 746 assert(TailBB != PredBB && 747 "Single-block loop should have been rejected earlier!"); 748 // EH edges are ignored by AnalyzeBranch. 749 if (PredBB->succ_size() > 1) 750 continue; 751 752 MachineBasicBlock *PredTBB, *PredFBB; 753 SmallVector<MachineOperand, 4> PredCond; 754 if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)) 755 continue; 756 if (!PredCond.empty()) 757 continue; 758 // Don't duplicate into a fall-through predecessor (at least for now). 759 if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 760 continue; 761 762 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 763 << "From Succ: " << *TailBB); 764 765 TDBBs.push_back(PredBB); 766 767 // Remove PredBB's unconditional branch. 768 TII->RemoveBranch(*PredBB); 769 770 if (RS && !TailBB->livein_empty()) { 771 // Update PredBB livein. 772 RS->enterBasicBlock(*PredBB); 773 if (!PredBB->empty()) 774 RS->forward(std::prev(PredBB->end())); 775 for (const auto &LI : TailBB->liveins()) { 776 if (!RS->isRegUsed(LI.PhysReg, false)) 777 // If a register is previously livein to the tail but it's not live 778 // at the end of predecessor BB, then it should be added to its 779 // livein list. 780 PredBB->addLiveIn(LI); 781 } 782 } 783 784 // Clone the contents of TailBB into PredBB. 785 DenseMap<unsigned, RegSubRegPair> LocalVRMap; 786 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 787 // Use instr_iterator here to properly handle bundles, e.g. 788 // ARM Thumb2 IT block. 789 MachineBasicBlock::instr_iterator I = TailBB->instr_begin(); 790 while (I != TailBB->instr_end()) { 791 MachineInstr *MI = &*I; 792 ++I; 793 if (MI->isPHI()) { 794 // Replace the uses of the def of the PHI with the register coming 795 // from PredBB. 796 processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 797 } else { 798 // Replace def of virtual registers with new registers, and update 799 // uses with PHI source register or the new registers. 800 duplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap, UsedByPhi); 801 } 802 } 803 appendCopies(PredBB, CopyInfos, Copies); 804 805 // Simplify 806 TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true); 807 808 NumInstrDups += TailBB->size() - 1; // subtract one for removed branch 809 810 // Update the CFG. 811 PredBB->removeSuccessor(PredBB->succ_begin()); 812 assert(PredBB->succ_empty() && 813 "TailDuplicate called on block with multiple successors!"); 814 for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(), 815 E = TailBB->succ_end(); 816 I != E; ++I) 817 PredBB->addSuccessor(*I, MBPI->getEdgeProbability(TailBB, I)); 818 819 Changed = true; 820 ++NumTailDups; 821 } 822 823 // If TailBB was duplicated into all its predecessors except for the prior 824 // block, which falls through unconditionally, move the contents of this 825 // block into the prior block. 826 MachineBasicBlock *PrevBB = &*std::prev(TailBB->getIterator()); 827 MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; 828 SmallVector<MachineOperand, 4> PriorCond; 829 // This has to check PrevBB->succ_size() because EH edges are ignored by 830 // AnalyzeBranch. 831 if (PrevBB->succ_size() == 1 && 832 !TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true) && 833 PriorCond.empty() && !PriorTBB && TailBB->pred_size() == 1 && 834 !TailBB->hasAddressTaken()) { 835 DEBUG(dbgs() << "\nMerging into block: " << *PrevBB 836 << "From MBB: " << *TailBB); 837 if (PreRegAlloc) { 838 DenseMap<unsigned, RegSubRegPair> LocalVRMap; 839 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 840 MachineBasicBlock::iterator I = TailBB->begin(); 841 // Process PHI instructions first. 842 while (I != TailBB->end() && I->isPHI()) { 843 // Replace the uses of the def of the PHI with the register coming 844 // from PredBB. 845 MachineInstr *MI = &*I++; 846 processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); 847 } 848 849 // Now copy the non-PHI instructions. 850 while (I != TailBB->end()) { 851 // Replace def of virtual registers with new registers, and update 852 // uses with PHI source register or the new registers. 853 MachineInstr *MI = &*I++; 854 assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); 855 duplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap, UsedByPhi); 856 MI->eraseFromParent(); 857 } 858 appendCopies(PrevBB, CopyInfos, Copies); 859 } else { 860 // No PHIs to worry about, just splice the instructions over. 861 PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); 862 } 863 PrevBB->removeSuccessor(PrevBB->succ_begin()); 864 assert(PrevBB->succ_empty()); 865 PrevBB->transferSuccessors(TailBB); 866 TDBBs.push_back(PrevBB); 867 Changed = true; 868 } 869 870 // If this is after register allocation, there are no phis to fix. 871 if (!PreRegAlloc) 872 return Changed; 873 874 // If we made no changes so far, we are safe. 875 if (!Changed) 876 return Changed; 877 878 // Handle the nasty case in that we duplicated a block that is part of a loop 879 // into some but not all of its predecessors. For example: 880 // 1 -> 2 <-> 3 | 881 // \ | 882 // \---> rest | 883 // if we duplicate 2 into 1 but not into 3, we end up with 884 // 12 -> 3 <-> 2 -> rest | 885 // \ / | 886 // \----->-----/ | 887 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced 888 // with a phi in 3 (which now dominates 2). 889 // What we do here is introduce a copy in 3 of the register defined by the 890 // phi, just like when we are duplicating 2 into 3, but we don't copy any 891 // real instructions or remove the 3 -> 2 edge from the phi in 2. 892 for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(), 893 PE = Preds.end(); 894 PI != PE; ++PI) { 895 MachineBasicBlock *PredBB = *PI; 896 if (std::find(TDBBs.begin(), TDBBs.end(), PredBB) != TDBBs.end()) 897 continue; 898 899 // EH edges 900 if (PredBB->succ_size() != 1) 901 continue; 902 903 DenseMap<unsigned, RegSubRegPair> LocalVRMap; 904 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 905 MachineBasicBlock::iterator I = TailBB->begin(); 906 // Process PHI instructions first. 907 while (I != TailBB->end() && I->isPHI()) { 908 // Replace the uses of the def of the PHI with the register coming 909 // from PredBB. 910 MachineInstr *MI = &*I++; 911 processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); 912 } 913 appendCopies(PredBB, CopyInfos, Copies); 914 } 915 916 return Changed; 917 } 918 919 /// At the end of the block \p MBB generate COPY instructions between registers 920 /// described by \p CopyInfos. Append resulting instructions to \p Copies. 921 void TailDuplicator::appendCopies(MachineBasicBlock *MBB, 922 SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos, 923 SmallVectorImpl<MachineInstr*> &Copies) { 924 MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); 925 const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); 926 for (auto &CI : CopyInfos) { 927 auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first) 928 .addReg(CI.second.Reg, 0, CI.second.SubReg); 929 Copies.push_back(C); 930 } 931 } 932 933 /// Remove the specified dead machine basic block from the function, updating 934 /// the CFG. 935 void TailDuplicator::removeDeadBlock(MachineBasicBlock *MBB) { 936 assert(MBB->pred_empty() && "MBB must be dead!"); 937 DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); 938 939 // Remove all successors. 940 while (!MBB->succ_empty()) 941 MBB->removeSuccessor(MBB->succ_end() - 1); 942 943 // Remove the block. 944 MBB->eraseFromParent(); 945 } 946 947 } // End llvm namespace 948