1 //===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This utility class duplicates basic blocks ending in unconditional branches 11 // into the tails of their predecessors. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SetVector.h" 18 #include "llvm/ADT/SmallPtrSet.h" 19 #include "llvm/ADT/SmallVector.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/ADT/STLExtras.h" 22 #include "llvm/CodeGen/MachineBasicBlock.h" 23 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineInstrBuilder.h" 27 #include "llvm/CodeGen/MachineOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/MachineSSAUpdater.h" 30 #include "llvm/CodeGen/TailDuplicator.h" 31 #include "llvm/IR/DebugLoc.h" 32 #include "llvm/IR/Function.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/Target/TargetInstrInfo.h" 38 #include "llvm/Target/TargetRegisterInfo.h" 39 #include "llvm/Target/TargetSubtargetInfo.h" 40 #include <algorithm> 41 #include <cassert> 42 #include <iterator> 43 #include <utility> 44 45 using namespace llvm; 46 47 #define DEBUG_TYPE "tailduplication" 48 49 STATISTIC(NumTails, "Number of tails duplicated"); 50 STATISTIC(NumTailDups, "Number of tail duplicated blocks"); 51 STATISTIC(NumTailDupAdded, 52 "Number of instructions added due to tail duplication"); 53 STATISTIC(NumTailDupRemoved, 54 "Number of instructions removed due to tail duplication"); 55 STATISTIC(NumDeadBlocks, "Number of dead blocks removed"); 56 STATISTIC(NumAddedPHIs, "Number of phis added"); 57 58 // Heuristic for tail duplication. 59 static cl::opt<unsigned> TailDuplicateSize( 60 "tail-dup-size", 61 cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2), 62 cl::Hidden); 63 64 static cl::opt<unsigned> TailDupIndirectBranchSize( 65 "tail-dup-indirect-size", 66 cl::desc("Maximum instructions to consider tail duplicating blocks that " 67 "end with indirect branches."), cl::init(20), 68 cl::Hidden); 69 70 static cl::opt<bool> 71 TailDupVerify("tail-dup-verify", 72 cl::desc("Verify sanity of PHI instructions during taildup"), 73 cl::init(false), cl::Hidden); 74 75 static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U), 76 cl::Hidden); 77 78 void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc, 79 const MachineBranchProbabilityInfo *MBPIin, 80 bool LayoutModeIn, unsigned TailDupSizeIn) { 81 MF = &MFin; 82 TII = MF->getSubtarget().getInstrInfo(); 83 TRI = MF->getSubtarget().getRegisterInfo(); 84 MRI = &MF->getRegInfo(); 85 MMI = &MF->getMMI(); 86 MBPI = MBPIin; 87 TailDupSize = TailDupSizeIn; 88 89 assert(MBPI != nullptr && "Machine Branch Probability Info required"); 90 91 LayoutMode = LayoutModeIn; 92 this->PreRegAlloc = PreRegAlloc; 93 } 94 95 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) { 96 for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) { 97 MachineBasicBlock *MBB = &*I; 98 SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(), 99 MBB->pred_end()); 100 MachineBasicBlock::iterator MI = MBB->begin(); 101 while (MI != MBB->end()) { 102 if (!MI->isPHI()) 103 break; 104 for (MachineBasicBlock *PredBB : Preds) { 105 bool Found = false; 106 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 107 MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 108 if (PHIBB == PredBB) { 109 Found = true; 110 break; 111 } 112 } 113 if (!Found) { 114 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 115 dbgs() << " missing input from predecessor BB#" 116 << PredBB->getNumber() << '\n'; 117 llvm_unreachable(nullptr); 118 } 119 } 120 121 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) { 122 MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB(); 123 if (CheckExtra && !Preds.count(PHIBB)) { 124 dbgs() << "Warning: malformed PHI in BB#" << MBB->getNumber() << ": " 125 << *MI; 126 dbgs() << " extra input from predecessor BB#" << PHIBB->getNumber() 127 << '\n'; 128 llvm_unreachable(nullptr); 129 } 130 if (PHIBB->getNumber() < 0) { 131 dbgs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI; 132 dbgs() << " non-existing BB#" << PHIBB->getNumber() << '\n'; 133 llvm_unreachable(nullptr); 134 } 135 } 136 ++MI; 137 } 138 } 139 } 140 141 /// Tail duplicate the block and cleanup. 142 /// \p IsSimple - return value of isSimpleBB 143 /// \p MBB - block to be duplicated 144 /// \p ForcedLayoutPred - If non-null, treat this block as the layout 145 /// predecessor, instead of using the ordering in MF 146 /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of 147 /// all Preds that received a copy of \p MBB. 148 /// \p RemovalCallback - if non-null, called just before MBB is deleted. 149 bool TailDuplicator::tailDuplicateAndUpdate( 150 bool IsSimple, MachineBasicBlock *MBB, 151 MachineBasicBlock *ForcedLayoutPred, 152 SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds, 153 function_ref<void(MachineBasicBlock *)> *RemovalCallback) { 154 // Save the successors list. 155 SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(), 156 MBB->succ_end()); 157 158 SmallVector<MachineBasicBlock *, 8> TDBBs; 159 SmallVector<MachineInstr *, 16> Copies; 160 if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred, TDBBs, Copies)) 161 return false; 162 163 ++NumTails; 164 165 SmallVector<MachineInstr *, 8> NewPHIs; 166 MachineSSAUpdater SSAUpdate(*MF, &NewPHIs); 167 168 // TailBB's immediate successors are now successors of those predecessors 169 // which duplicated TailBB. Add the predecessors as sources to the PHI 170 // instructions. 171 bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken(); 172 if (PreRegAlloc) 173 updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs); 174 175 // If it is dead, remove it. 176 if (isDead) { 177 NumTailDupRemoved += MBB->size(); 178 removeDeadBlock(MBB, RemovalCallback); 179 ++NumDeadBlocks; 180 } 181 182 // Update SSA form. 183 if (!SSAUpdateVRs.empty()) { 184 for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) { 185 unsigned VReg = SSAUpdateVRs[i]; 186 SSAUpdate.Initialize(VReg); 187 188 // If the original definition is still around, add it as an available 189 // value. 190 MachineInstr *DefMI = MRI->getVRegDef(VReg); 191 MachineBasicBlock *DefBB = nullptr; 192 if (DefMI) { 193 DefBB = DefMI->getParent(); 194 SSAUpdate.AddAvailableValue(DefBB, VReg); 195 } 196 197 // Add the new vregs as available values. 198 DenseMap<unsigned, AvailableValsTy>::iterator LI = 199 SSAUpdateVals.find(VReg); 200 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 201 MachineBasicBlock *SrcBB = LI->second[j].first; 202 unsigned SrcReg = LI->second[j].second; 203 SSAUpdate.AddAvailableValue(SrcBB, SrcReg); 204 } 205 206 // Rewrite uses that are outside of the original def's block. 207 MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg); 208 while (UI != MRI->use_end()) { 209 MachineOperand &UseMO = *UI; 210 MachineInstr *UseMI = UseMO.getParent(); 211 ++UI; 212 if (UseMI->isDebugValue()) { 213 // SSAUpdate can replace the use with an undef. That creates 214 // a debug instruction that is a kill. 215 // FIXME: Should it SSAUpdate job to delete debug instructions 216 // instead of replacing the use with undef? 217 UseMI->eraseFromParent(); 218 continue; 219 } 220 if (UseMI->getParent() == DefBB && !UseMI->isPHI()) 221 continue; 222 SSAUpdate.RewriteUse(UseMO); 223 } 224 } 225 226 SSAUpdateVRs.clear(); 227 SSAUpdateVals.clear(); 228 } 229 230 // Eliminate some of the copies inserted by tail duplication to maintain 231 // SSA form. 232 for (unsigned i = 0, e = Copies.size(); i != e; ++i) { 233 MachineInstr *Copy = Copies[i]; 234 if (!Copy->isCopy()) 235 continue; 236 unsigned Dst = Copy->getOperand(0).getReg(); 237 unsigned Src = Copy->getOperand(1).getReg(); 238 if (MRI->hasOneNonDBGUse(Src) && 239 MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) { 240 // Copy is the only use. Do trivial copy propagation here. 241 MRI->replaceRegWith(Dst, Src); 242 Copy->eraseFromParent(); 243 } 244 } 245 246 if (NewPHIs.size()) 247 NumAddedPHIs += NewPHIs.size(); 248 249 if (DuplicatedPreds) 250 *DuplicatedPreds = std::move(TDBBs); 251 252 return true; 253 } 254 255 /// Look for small blocks that are unconditionally branched to and do not fall 256 /// through. Tail-duplicate their instructions into their predecessors to 257 /// eliminate (dynamic) branches. 258 bool TailDuplicator::tailDuplicateBlocks() { 259 bool MadeChange = false; 260 261 if (PreRegAlloc && TailDupVerify) { 262 DEBUG(dbgs() << "\n*** Before tail-duplicating\n"); 263 VerifyPHIs(*MF, true); 264 } 265 266 for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) { 267 MachineBasicBlock *MBB = &*I++; 268 269 if (NumTails == TailDupLimit) 270 break; 271 272 bool IsSimple = isSimpleBB(MBB); 273 274 if (!shouldTailDuplicate(IsSimple, *MBB)) 275 continue; 276 277 MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr); 278 } 279 280 if (PreRegAlloc && TailDupVerify) 281 VerifyPHIs(*MF, false); 282 283 return MadeChange; 284 } 285 286 static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB, 287 const MachineRegisterInfo *MRI) { 288 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 289 if (UseMI.isDebugValue()) 290 continue; 291 if (UseMI.getParent() != BB) 292 return true; 293 } 294 return false; 295 } 296 297 static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) { 298 for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) 299 if (MI->getOperand(i + 1).getMBB() == SrcBB) 300 return i; 301 return 0; 302 } 303 304 // Remember which registers are used by phis in this block. This is 305 // used to determine which registers are liveout while modifying the 306 // block (which is why we need to copy the information). 307 static void getRegsUsedByPHIs(const MachineBasicBlock &BB, 308 DenseSet<unsigned> *UsedByPhi) { 309 for (const auto &MI : BB) { 310 if (!MI.isPHI()) 311 break; 312 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 313 unsigned SrcReg = MI.getOperand(i).getReg(); 314 UsedByPhi->insert(SrcReg); 315 } 316 } 317 } 318 319 /// Add a definition and source virtual registers pair for SSA update. 320 void TailDuplicator::addSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, 321 MachineBasicBlock *BB) { 322 DenseMap<unsigned, AvailableValsTy>::iterator LI = 323 SSAUpdateVals.find(OrigReg); 324 if (LI != SSAUpdateVals.end()) 325 LI->second.push_back(std::make_pair(BB, NewReg)); 326 else { 327 AvailableValsTy Vals; 328 Vals.push_back(std::make_pair(BB, NewReg)); 329 SSAUpdateVals.insert(std::make_pair(OrigReg, Vals)); 330 SSAUpdateVRs.push_back(OrigReg); 331 } 332 } 333 334 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the 335 /// source register that's contributed by PredBB and update SSA update map. 336 void TailDuplicator::processPHI( 337 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 338 DenseMap<unsigned, RegSubRegPair> &LocalVRMap, 339 SmallVectorImpl<std::pair<unsigned, RegSubRegPair>> &Copies, 340 const DenseSet<unsigned> &RegsUsedByPhi, bool Remove) { 341 unsigned DefReg = MI->getOperand(0).getReg(); 342 unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); 343 assert(SrcOpIdx && "Unable to find matching PHI source?"); 344 unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg(); 345 unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg(); 346 const TargetRegisterClass *RC = MRI->getRegClass(DefReg); 347 LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg))); 348 349 // Insert a copy from source to the end of the block. The def register is the 350 // available value liveout of the block. 351 unsigned NewDef = MRI->createVirtualRegister(RC); 352 Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg))); 353 if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg)) 354 addSSAUpdateEntry(DefReg, NewDef, PredBB); 355 356 if (!Remove) 357 return; 358 359 // Remove PredBB from the PHI node. 360 MI->RemoveOperand(SrcOpIdx + 1); 361 MI->RemoveOperand(SrcOpIdx); 362 if (MI->getNumOperands() == 1) 363 MI->eraseFromParent(); 364 } 365 366 /// Duplicate a TailBB instruction to PredBB and update 367 /// the source operands due to earlier PHI translation. 368 void TailDuplicator::duplicateInstruction( 369 MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, 370 DenseMap<unsigned, RegSubRegPair> &LocalVRMap, 371 const DenseSet<unsigned> &UsedByPhi) { 372 MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI); 373 if (PreRegAlloc) { 374 for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) { 375 MachineOperand &MO = NewMI.getOperand(i); 376 if (!MO.isReg()) 377 continue; 378 unsigned Reg = MO.getReg(); 379 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 380 continue; 381 if (MO.isDef()) { 382 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 383 unsigned NewReg = MRI->createVirtualRegister(RC); 384 MO.setReg(NewReg); 385 LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); 386 if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg)) 387 addSSAUpdateEntry(Reg, NewReg, PredBB); 388 } else { 389 auto VI = LocalVRMap.find(Reg); 390 if (VI != LocalVRMap.end()) { 391 // Need to make sure that the register class of the mapped register 392 // will satisfy the constraints of the class of the register being 393 // replaced. 394 auto *OrigRC = MRI->getRegClass(Reg); 395 auto *MappedRC = MRI->getRegClass(VI->second.Reg); 396 const TargetRegisterClass *ConstrRC; 397 if (VI->second.SubReg != 0) { 398 ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC, 399 VI->second.SubReg); 400 if (ConstrRC) { 401 // The actual constraining (as in "find appropriate new class") 402 // is done by getMatchingSuperRegClass, so now we only need to 403 // change the class of the mapped register. 404 MRI->setRegClass(VI->second.Reg, ConstrRC); 405 } 406 } else { 407 // For mapped registers that do not have sub-registers, simply 408 // restrict their class to match the original one. 409 ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC); 410 } 411 412 if (ConstrRC) { 413 // If the class constraining succeeded, we can simply replace 414 // the old register with the mapped one. 415 MO.setReg(VI->second.Reg); 416 // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a 417 // sub-register, we need to compose the sub-register indices. 418 MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(), 419 VI->second.SubReg)); 420 } else { 421 // The direct replacement is not possible, due to failing register 422 // class constraints. An explicit COPY is necessary. Create one 423 // that can be reused 424 auto *NewRC = MI->getRegClassConstraint(i, TII, TRI); 425 if (NewRC == nullptr) 426 NewRC = OrigRC; 427 unsigned NewReg = MRI->createVirtualRegister(NewRC); 428 BuildMI(*PredBB, MI, MI->getDebugLoc(), 429 TII->get(TargetOpcode::COPY), NewReg) 430 .addReg(VI->second.Reg, 0, VI->second.SubReg); 431 LocalVRMap.erase(VI); 432 LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0))); 433 MO.setReg(NewReg); 434 // The composed VI.Reg:VI.SubReg is replaced with NewReg, which 435 // is equivalent to the whole register Reg. Hence, Reg:subreg 436 // is same as NewReg:subreg, so keep the sub-register index 437 // unchanged. 438 } 439 // Clear any kill flags from this operand. The new register could 440 // have uses after this one, so kills are not valid here. 441 MO.setIsKill(false); 442 } 443 } 444 } 445 } 446 } 447 448 /// After FromBB is tail duplicated into its predecessor blocks, the successors 449 /// have gained new predecessors. Update the PHI instructions in them 450 /// accordingly. 451 void TailDuplicator::updateSuccessorsPHIs( 452 MachineBasicBlock *FromBB, bool isDead, 453 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 454 SmallSetVector<MachineBasicBlock *, 8> &Succs) { 455 for (MachineBasicBlock *SuccBB : Succs) { 456 for (MachineInstr &MI : *SuccBB) { 457 if (!MI.isPHI()) 458 break; 459 MachineInstrBuilder MIB(*FromBB->getParent(), MI); 460 unsigned Idx = 0; 461 for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) { 462 MachineOperand &MO = MI.getOperand(i + 1); 463 if (MO.getMBB() == FromBB) { 464 Idx = i; 465 break; 466 } 467 } 468 469 assert(Idx != 0); 470 MachineOperand &MO0 = MI.getOperand(Idx); 471 unsigned Reg = MO0.getReg(); 472 if (isDead) { 473 // Folded into the previous BB. 474 // There could be duplicate phi source entries. FIXME: Should sdisel 475 // or earlier pass fixed this? 476 for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) { 477 MachineOperand &MO = MI.getOperand(i + 1); 478 if (MO.getMBB() == FromBB) { 479 MI.RemoveOperand(i + 1); 480 MI.RemoveOperand(i); 481 } 482 } 483 } else 484 Idx = 0; 485 486 // If Idx is set, the operands at Idx and Idx+1 must be removed. 487 // We reuse the location to avoid expensive RemoveOperand calls. 488 489 DenseMap<unsigned, AvailableValsTy>::iterator LI = 490 SSAUpdateVals.find(Reg); 491 if (LI != SSAUpdateVals.end()) { 492 // This register is defined in the tail block. 493 for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) { 494 MachineBasicBlock *SrcBB = LI->second[j].first; 495 // If we didn't duplicate a bb into a particular predecessor, we 496 // might still have added an entry to SSAUpdateVals to correcly 497 // recompute SSA. If that case, avoid adding a dummy extra argument 498 // this PHI. 499 if (!SrcBB->isSuccessor(SuccBB)) 500 continue; 501 502 unsigned SrcReg = LI->second[j].second; 503 if (Idx != 0) { 504 MI.getOperand(Idx).setReg(SrcReg); 505 MI.getOperand(Idx + 1).setMBB(SrcBB); 506 Idx = 0; 507 } else { 508 MIB.addReg(SrcReg).addMBB(SrcBB); 509 } 510 } 511 } else { 512 // Live in tail block, must also be live in predecessors. 513 for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) { 514 MachineBasicBlock *SrcBB = TDBBs[j]; 515 if (Idx != 0) { 516 MI.getOperand(Idx).setReg(Reg); 517 MI.getOperand(Idx + 1).setMBB(SrcBB); 518 Idx = 0; 519 } else { 520 MIB.addReg(Reg).addMBB(SrcBB); 521 } 522 } 523 } 524 if (Idx != 0) { 525 MI.RemoveOperand(Idx + 1); 526 MI.RemoveOperand(Idx); 527 } 528 } 529 } 530 } 531 532 /// Determine if it is profitable to duplicate this block. 533 bool TailDuplicator::shouldTailDuplicate(bool IsSimple, 534 MachineBasicBlock &TailBB) { 535 // When doing tail-duplication during layout, the block ordering is in flux, 536 // so canFallThrough returns a result based on incorrect information and 537 // should just be ignored. 538 if (!LayoutMode && TailBB.canFallThrough()) 539 return false; 540 541 // Don't try to tail-duplicate single-block loops. 542 if (TailBB.isSuccessor(&TailBB)) 543 return false; 544 545 // Set the limit on the cost to duplicate. When optimizing for size, 546 // duplicate only one, because one branch instruction can be eliminated to 547 // compensate for the duplication. 548 unsigned MaxDuplicateCount; 549 if (TailDupSize == 0 && 550 TailDuplicateSize.getNumOccurrences() == 0 && 551 MF->getFunction()->optForSize()) 552 MaxDuplicateCount = 1; 553 else if (TailDupSize == 0) 554 MaxDuplicateCount = TailDuplicateSize; 555 else 556 MaxDuplicateCount = TailDupSize; 557 558 // If the block to be duplicated ends in an unanalyzable fallthrough, don't 559 // duplicate it. 560 // A similar check is necessary in MachineBlockPlacement to make sure pairs of 561 // blocks with unanalyzable fallthrough get layed out contiguously. 562 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 563 SmallVector<MachineOperand, 4> PredCond; 564 if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) && 565 TailBB.canFallThrough()) 566 return false; 567 568 // If the target has hardware branch prediction that can handle indirect 569 // branches, duplicating them can often make them predictable when there 570 // are common paths through the code. The limit needs to be high enough 571 // to allow undoing the effects of tail merging and other optimizations 572 // that rearrange the predecessors of the indirect branch. 573 574 bool HasIndirectbr = false; 575 if (!TailBB.empty()) 576 HasIndirectbr = TailBB.back().isIndirectBranch(); 577 578 if (HasIndirectbr && PreRegAlloc) 579 MaxDuplicateCount = TailDupIndirectBranchSize; 580 581 // Check the instructions in the block to determine whether tail-duplication 582 // is invalid or unlikely to be profitable. 583 unsigned InstrCount = 0; 584 for (MachineInstr &MI : TailBB) { 585 // Non-duplicable things shouldn't be tail-duplicated. 586 if (MI.isNotDuplicable()) 587 return false; 588 589 // Convergent instructions can be duplicated only if doing so doesn't add 590 // new control dependencies, which is what we're going to do here. 591 if (MI.isConvergent()) 592 return false; 593 594 // Do not duplicate 'return' instructions if this is a pre-regalloc run. 595 // A return may expand into a lot more instructions (e.g. reload of callee 596 // saved registers) after PEI. 597 if (PreRegAlloc && MI.isReturn()) 598 return false; 599 600 // Avoid duplicating calls before register allocation. Calls presents a 601 // barrier to register allocation so duplicating them may end up increasing 602 // spills. 603 if (PreRegAlloc && MI.isCall()) 604 return false; 605 606 if (!MI.isPHI() && !MI.isDebugValue()) 607 InstrCount += 1; 608 609 if (InstrCount > MaxDuplicateCount) 610 return false; 611 } 612 613 // Check if any of the successors of TailBB has a PHI node in which the 614 // value corresponding to TailBB uses a subregister. 615 // If a phi node uses a register paired with a subregister, the actual 616 // "value type" of the phi may differ from the type of the register without 617 // any subregisters. Due to a bug, tail duplication may add a new operand 618 // without a necessary subregister, producing an invalid code. This is 619 // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll. 620 // Disable tail duplication for this case for now, until the problem is 621 // fixed. 622 for (auto SB : TailBB.successors()) { 623 for (auto &I : *SB) { 624 if (!I.isPHI()) 625 break; 626 unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB); 627 assert(Idx != 0); 628 MachineOperand &PU = I.getOperand(Idx); 629 if (PU.getSubReg() != 0) 630 return false; 631 } 632 } 633 634 if (HasIndirectbr && PreRegAlloc) 635 return true; 636 637 if (IsSimple) 638 return true; 639 640 if (!PreRegAlloc) 641 return true; 642 643 return canCompletelyDuplicateBB(TailBB); 644 } 645 646 /// True if this BB has only one unconditional jump. 647 bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) { 648 if (TailBB->succ_size() != 1) 649 return false; 650 if (TailBB->pred_empty()) 651 return false; 652 MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(); 653 if (I == TailBB->end()) 654 return true; 655 return I->isUnconditionalBranch(); 656 } 657 658 static bool bothUsedInPHI(const MachineBasicBlock &A, 659 const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) { 660 for (MachineBasicBlock *BB : A.successors()) 661 if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI()) 662 return true; 663 664 return false; 665 } 666 667 bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) { 668 for (MachineBasicBlock *PredBB : BB.predecessors()) { 669 if (PredBB->succ_size() > 1) 670 return false; 671 672 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 673 SmallVector<MachineOperand, 4> PredCond; 674 if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) 675 return false; 676 677 if (!PredCond.empty()) 678 return false; 679 } 680 return true; 681 } 682 683 bool TailDuplicator::duplicateSimpleBB( 684 MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs, 685 const DenseSet<unsigned> &UsedByPhi, 686 SmallVectorImpl<MachineInstr *> &Copies) { 687 SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(), 688 TailBB->succ_end()); 689 SmallVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 690 TailBB->pred_end()); 691 bool Changed = false; 692 for (MachineBasicBlock *PredBB : Preds) { 693 if (PredBB->hasEHPadSuccessor()) 694 continue; 695 696 if (bothUsedInPHI(*PredBB, Succs)) 697 continue; 698 699 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 700 SmallVector<MachineOperand, 4> PredCond; 701 if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) 702 continue; 703 704 Changed = true; 705 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 706 << "From simple Succ: " << *TailBB); 707 708 MachineBasicBlock *NewTarget = *TailBB->succ_begin(); 709 MachineBasicBlock *NextBB = PredBB->getNextNode(); 710 711 // Make PredFBB explicit. 712 if (PredCond.empty()) 713 PredFBB = PredTBB; 714 715 // Make fall through explicit. 716 if (!PredTBB) 717 PredTBB = NextBB; 718 if (!PredFBB) 719 PredFBB = NextBB; 720 721 // Redirect 722 if (PredFBB == TailBB) 723 PredFBB = NewTarget; 724 if (PredTBB == TailBB) 725 PredTBB = NewTarget; 726 727 // Make the branch unconditional if possible 728 if (PredTBB == PredFBB) { 729 PredCond.clear(); 730 PredFBB = nullptr; 731 } 732 733 // Avoid adding fall through branches. 734 if (PredFBB == NextBB) 735 PredFBB = nullptr; 736 if (PredTBB == NextBB && PredFBB == nullptr) 737 PredTBB = nullptr; 738 739 auto DL = PredBB->findBranchDebugLoc(); 740 TII->removeBranch(*PredBB); 741 742 if (!PredBB->isSuccessor(NewTarget)) 743 PredBB->replaceSuccessor(TailBB, NewTarget); 744 else { 745 PredBB->removeSuccessor(TailBB, true); 746 assert(PredBB->succ_size() <= 1); 747 } 748 749 if (PredTBB) 750 TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DL); 751 752 TDBBs.push_back(PredBB); 753 } 754 return Changed; 755 } 756 757 bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB, 758 MachineBasicBlock *PredBB) { 759 // EH edges are ignored by analyzeBranch. 760 if (PredBB->succ_size() > 1) 761 return false; 762 763 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 764 SmallVector<MachineOperand, 4> PredCond; 765 if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond)) 766 return false; 767 if (!PredCond.empty()) 768 return false; 769 return true; 770 } 771 772 /// If it is profitable, duplicate TailBB's contents in each 773 /// of its predecessors. 774 /// \p IsSimple result of isSimpleBB 775 /// \p TailBB Block to be duplicated. 776 /// \p ForcedLayoutPred When non-null, use this block as the layout predecessor 777 /// instead of the previous block in MF's order. 778 /// \p TDBBs A vector to keep track of all blocks tail-duplicated 779 /// into. 780 /// \p Copies A vector of copy instructions inserted. Used later to 781 /// walk all the inserted copies and remove redundant ones. 782 bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB, 783 MachineBasicBlock *ForcedLayoutPred, 784 SmallVectorImpl<MachineBasicBlock *> &TDBBs, 785 SmallVectorImpl<MachineInstr *> &Copies) { 786 DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); 787 788 DenseSet<unsigned> UsedByPhi; 789 getRegsUsedByPHIs(*TailBB, &UsedByPhi); 790 791 if (IsSimple) 792 return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies); 793 794 // Iterate through all the unique predecessors and tail-duplicate this 795 // block into them, if possible. Copying the list ahead of time also 796 // avoids trouble with the predecessor list reallocating. 797 bool Changed = false; 798 SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(), 799 TailBB->pred_end()); 800 for (MachineBasicBlock *PredBB : Preds) { 801 assert(TailBB != PredBB && 802 "Single-block loop should have been rejected earlier!"); 803 804 if (!canTailDuplicate(TailBB, PredBB)) 805 continue; 806 807 // Don't duplicate into a fall-through predecessor (at least for now). 808 bool IsLayoutSuccessor = false; 809 if (ForcedLayoutPred) 810 IsLayoutSuccessor = (ForcedLayoutPred == PredBB); 811 else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough()) 812 IsLayoutSuccessor = true; 813 if (IsLayoutSuccessor) 814 continue; 815 816 DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB 817 << "From Succ: " << *TailBB); 818 819 TDBBs.push_back(PredBB); 820 821 // Remove PredBB's unconditional branch. 822 TII->removeBranch(*PredBB); 823 824 // Clone the contents of TailBB into PredBB. 825 DenseMap<unsigned, RegSubRegPair> LocalVRMap; 826 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 827 for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end(); 828 I != E; /* empty */) { 829 MachineInstr *MI = &*I; 830 ++I; 831 if (MI->isPHI()) { 832 // Replace the uses of the def of the PHI with the register coming 833 // from PredBB. 834 processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true); 835 } else { 836 // Replace def of virtual registers with new registers, and update 837 // uses with PHI source register or the new registers. 838 duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi); 839 } 840 } 841 appendCopies(PredBB, CopyInfos, Copies); 842 843 // Simplify 844 MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr; 845 SmallVector<MachineOperand, 4> PredCond; 846 TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond); 847 848 NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch 849 850 // Update the CFG. 851 PredBB->removeSuccessor(PredBB->succ_begin()); 852 assert(PredBB->succ_empty() && 853 "TailDuplicate called on block with multiple successors!"); 854 for (MachineBasicBlock *Succ : TailBB->successors()) 855 PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ)); 856 857 Changed = true; 858 ++NumTailDups; 859 } 860 861 // If TailBB was duplicated into all its predecessors except for the prior 862 // block, which falls through unconditionally, move the contents of this 863 // block into the prior block. 864 MachineBasicBlock *PrevBB = ForcedLayoutPred; 865 if (!PrevBB) 866 PrevBB = &*std::prev(TailBB->getIterator()); 867 MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr; 868 SmallVector<MachineOperand, 4> PriorCond; 869 // This has to check PrevBB->succ_size() because EH edges are ignored by 870 // analyzeBranch. 871 if (PrevBB->succ_size() == 1 && 872 // Layout preds are not always CFG preds. Check. 873 *PrevBB->succ_begin() == TailBB && 874 !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) && 875 PriorCond.empty() && 876 (!PriorTBB || PriorTBB == TailBB) && 877 TailBB->pred_size() == 1 && 878 !TailBB->hasAddressTaken()) { 879 DEBUG(dbgs() << "\nMerging into block: " << *PrevBB 880 << "From MBB: " << *TailBB); 881 // There may be a branch to the layout successor. This is unlikely but it 882 // happens. The correct thing to do is to remove the branch before 883 // duplicating the instructions in all cases. 884 TII->removeBranch(*PrevBB); 885 if (PreRegAlloc) { 886 DenseMap<unsigned, RegSubRegPair> LocalVRMap; 887 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 888 MachineBasicBlock::iterator I = TailBB->begin(); 889 // Process PHI instructions first. 890 while (I != TailBB->end() && I->isPHI()) { 891 // Replace the uses of the def of the PHI with the register coming 892 // from PredBB. 893 MachineInstr *MI = &*I++; 894 processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true); 895 } 896 897 // Now copy the non-PHI instructions. 898 while (I != TailBB->end()) { 899 // Replace def of virtual registers with new registers, and update 900 // uses with PHI source register or the new registers. 901 MachineInstr *MI = &*I++; 902 assert(!MI->isBundle() && "Not expecting bundles before regalloc!"); 903 duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi); 904 MI->eraseFromParent(); 905 } 906 appendCopies(PrevBB, CopyInfos, Copies); 907 } else { 908 TII->removeBranch(*PrevBB); 909 // No PHIs to worry about, just splice the instructions over. 910 PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end()); 911 } 912 PrevBB->removeSuccessor(PrevBB->succ_begin()); 913 assert(PrevBB->succ_empty()); 914 PrevBB->transferSuccessors(TailBB); 915 TDBBs.push_back(PrevBB); 916 Changed = true; 917 } 918 919 // If this is after register allocation, there are no phis to fix. 920 if (!PreRegAlloc) 921 return Changed; 922 923 // If we made no changes so far, we are safe. 924 if (!Changed) 925 return Changed; 926 927 // Handle the nasty case in that we duplicated a block that is part of a loop 928 // into some but not all of its predecessors. For example: 929 // 1 -> 2 <-> 3 | 930 // \ | 931 // \---> rest | 932 // if we duplicate 2 into 1 but not into 3, we end up with 933 // 12 -> 3 <-> 2 -> rest | 934 // \ / | 935 // \----->-----/ | 936 // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced 937 // with a phi in 3 (which now dominates 2). 938 // What we do here is introduce a copy in 3 of the register defined by the 939 // phi, just like when we are duplicating 2 into 3, but we don't copy any 940 // real instructions or remove the 3 -> 2 edge from the phi in 2. 941 for (MachineBasicBlock *PredBB : Preds) { 942 if (is_contained(TDBBs, PredBB)) 943 continue; 944 945 // EH edges 946 if (PredBB->succ_size() != 1) 947 continue; 948 949 DenseMap<unsigned, RegSubRegPair> LocalVRMap; 950 SmallVector<std::pair<unsigned, RegSubRegPair>, 4> CopyInfos; 951 MachineBasicBlock::iterator I = TailBB->begin(); 952 // Process PHI instructions first. 953 while (I != TailBB->end() && I->isPHI()) { 954 // Replace the uses of the def of the PHI with the register coming 955 // from PredBB. 956 MachineInstr *MI = &*I++; 957 processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false); 958 } 959 appendCopies(PredBB, CopyInfos, Copies); 960 } 961 962 return Changed; 963 } 964 965 /// At the end of the block \p MBB generate COPY instructions between registers 966 /// described by \p CopyInfos. Append resulting instructions to \p Copies. 967 void TailDuplicator::appendCopies(MachineBasicBlock *MBB, 968 SmallVectorImpl<std::pair<unsigned,RegSubRegPair>> &CopyInfos, 969 SmallVectorImpl<MachineInstr*> &Copies) { 970 MachineBasicBlock::iterator Loc = MBB->getFirstTerminator(); 971 const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY); 972 for (auto &CI : CopyInfos) { 973 auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first) 974 .addReg(CI.second.Reg, 0, CI.second.SubReg); 975 Copies.push_back(C); 976 } 977 } 978 979 /// Remove the specified dead machine basic block from the function, updating 980 /// the CFG. 981 void TailDuplicator::removeDeadBlock( 982 MachineBasicBlock *MBB, 983 function_ref<void(MachineBasicBlock *)> *RemovalCallback) { 984 assert(MBB->pred_empty() && "MBB must be dead!"); 985 DEBUG(dbgs() << "\nRemoving MBB: " << *MBB); 986 987 if (RemovalCallback) 988 (*RemovalCallback)(MBB); 989 990 // Remove all successors. 991 while (!MBB->succ_empty()) 992 MBB->removeSuccessor(MBB->succ_end() - 1); 993 994 // Remove the block. 995 MBB->eraseFromParent(); 996 } 997