1 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the generic RegisterCoalescer interface which 11 // is used as the common interface used by all clients and 12 // implementations of register coalescing. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "regalloc" 17 #include "RegisterCoalescer.h" 18 #include "LiveDebugVariables.h" 19 #include "RegisterClassInfo.h" 20 #include "VirtRegMap.h" 21 22 #include "llvm/Pass.h" 23 #include "llvm/Value.h" 24 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/Target/TargetInstrInfo.h" 28 #include "llvm/Target/TargetRegisterInfo.h" 29 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 30 #include "llvm/Analysis/AliasAnalysis.h" 31 #include "llvm/CodeGen/MachineFrameInfo.h" 32 #include "llvm/CodeGen/MachineInstr.h" 33 #include "llvm/CodeGen/MachineLoopInfo.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/Passes.h" 36 #include "llvm/Target/TargetInstrInfo.h" 37 #include "llvm/Target/TargetMachine.h" 38 #include "llvm/Target/TargetOptions.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include "llvm/ADT/OwningPtr.h" 44 #include "llvm/ADT/SmallSet.h" 45 #include "llvm/ADT/Statistic.h" 46 #include "llvm/ADT/STLExtras.h" 47 #include <algorithm> 48 #include <cmath> 49 using namespace llvm; 50 51 STATISTIC(numJoins , "Number of interval joins performed"); 52 STATISTIC(numCrossRCs , "Number of cross class joins performed"); 53 STATISTIC(numCommutes , "Number of instruction commuting performed"); 54 STATISTIC(numExtends , "Number of copies extended"); 55 STATISTIC(NumReMats , "Number of instructions re-materialized"); 56 STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); 57 STATISTIC(numAborts , "Number of times interval joining aborted"); 58 STATISTIC(NumInflated , "Number of register classes inflated"); 59 60 static cl::opt<bool> 61 EnableJoining("join-liveintervals", 62 cl::desc("Coalesce copies (default=true)"), 63 cl::init(true)); 64 65 static cl::opt<bool> 66 DisableCrossClassJoin("disable-cross-class-join", 67 cl::desc("Avoid coalescing cross register class copies"), 68 cl::init(false), cl::Hidden); 69 70 static cl::opt<bool> 71 EnablePhysicalJoin("join-physregs", 72 cl::desc("Join physical register copies"), 73 cl::init(false), cl::Hidden); 74 75 static cl::opt<bool> 76 VerifyCoalescing("verify-coalescing", 77 cl::desc("Verify machine instrs before and after register coalescing"), 78 cl::Hidden); 79 80 namespace { 81 class RegisterCoalescer : public MachineFunctionPass { 82 MachineFunction* MF; 83 MachineRegisterInfo* MRI; 84 const TargetMachine* TM; 85 const TargetRegisterInfo* TRI; 86 const TargetInstrInfo* TII; 87 LiveIntervals *LIS; 88 LiveDebugVariables *LDV; 89 const MachineLoopInfo* Loops; 90 AliasAnalysis *AA; 91 RegisterClassInfo RegClassInfo; 92 93 /// JoinedCopies - Keep track of copies eliminated due to coalescing. 94 /// 95 SmallPtrSet<MachineInstr*, 32> JoinedCopies; 96 97 /// ReMatCopies - Keep track of copies eliminated due to remat. 98 /// 99 SmallPtrSet<MachineInstr*, 32> ReMatCopies; 100 101 /// ReMatDefs - Keep track of definition instructions which have 102 /// been remat'ed. 103 SmallPtrSet<MachineInstr*, 8> ReMatDefs; 104 105 /// joinIntervals - join compatible live intervals 106 void joinIntervals(); 107 108 /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting 109 /// copies that cannot yet be coalesced into the "TryAgain" list. 110 void CopyCoalesceInMBB(MachineBasicBlock *MBB, 111 std::vector<MachineInstr*> &TryAgain); 112 113 /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, 114 /// which are the src/dst of the copy instruction CopyMI. This returns 115 /// true if the copy was successfully coalesced away. If it is not 116 /// currently possible to coalesce this interval, but it may be possible if 117 /// other things get coalesced, then it returns true by reference in 118 /// 'Again'. 119 bool JoinCopy(MachineInstr *TheCopy, bool &Again); 120 121 /// JoinIntervals - Attempt to join these two intervals. On failure, this 122 /// returns false. The output "SrcInt" will not have been modified, so we 123 /// can use this information below to update aliases. 124 bool JoinIntervals(CoalescerPair &CP); 125 126 /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If 127 /// the source value number is defined by a copy from the destination reg 128 /// see if we can merge these two destination reg valno# into a single 129 /// value number, eliminating a copy. 130 bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); 131 132 /// HasOtherReachingDefs - Return true if there are definitions of IntB 133 /// other than BValNo val# that can reach uses of AValno val# of IntA. 134 bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, 135 VNInfo *AValNo, VNInfo *BValNo); 136 137 /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy. 138 /// If the source value number is defined by a commutable instruction and 139 /// its other operand is coalesced to the copy dest register, see if we 140 /// can transform the copy into a noop by commuting the definition. 141 bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); 142 143 /// ReMaterializeTrivialDef - If the source of a copy is defined by a 144 /// trivial computation, replace the copy by rematerialize the definition. 145 /// If PreserveSrcInt is true, make sure SrcInt is valid after the call. 146 bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt, 147 unsigned DstReg, MachineInstr *CopyMI); 148 149 /// shouldJoinPhys - Return true if a physreg copy should be joined. 150 bool shouldJoinPhys(CoalescerPair &CP); 151 152 /// isWinToJoinCrossClass - Return true if it's profitable to coalesce 153 /// two virtual registers from different register classes. 154 bool isWinToJoinCrossClass(unsigned SrcReg, 155 unsigned DstReg, 156 const TargetRegisterClass *SrcRC, 157 const TargetRegisterClass *DstRC, 158 const TargetRegisterClass *NewRC); 159 160 /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and 161 /// update the subregister number if it is not zero. If DstReg is a 162 /// physical register and the existing subregister number of the def / use 163 /// being updated is not zero, make sure to set it to the correct physical 164 /// subregister. 165 void UpdateRegDefsUses(const CoalescerPair &CP); 166 167 /// RemoveDeadDef - If a def of a live interval is now determined dead, 168 /// remove the val# it defines. If the live interval becomes empty, remove 169 /// it as well. 170 bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); 171 172 /// markAsJoined - Remember that CopyMI has already been joined. 173 void markAsJoined(MachineInstr *CopyMI); 174 175 /// eliminateUndefCopy - Handle copies of undef values. 176 bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); 177 178 public: 179 static char ID; // Class identification, replacement for typeinfo 180 RegisterCoalescer() : MachineFunctionPass(ID) { 181 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 182 } 183 184 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 185 186 virtual void releaseMemory(); 187 188 /// runOnMachineFunction - pass entry point 189 virtual bool runOnMachineFunction(MachineFunction&); 190 191 /// print - Implement the dump method. 192 virtual void print(raw_ostream &O, const Module* = 0) const; 193 }; 194 } /// end anonymous namespace 195 196 char &llvm::RegisterCoalescerPassID = RegisterCoalescer::ID; 197 198 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", 199 "Simple Register Coalescing", false, false) 200 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 201 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) 202 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 203 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 204 INITIALIZE_PASS_DEPENDENCY(StrongPHIElimination) 205 INITIALIZE_PASS_DEPENDENCY(PHIElimination) 206 INITIALIZE_PASS_DEPENDENCY(TwoAddressInstructionPass) 207 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 208 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", 209 "Simple Register Coalescing", false, false) 210 211 char RegisterCoalescer::ID = 0; 212 213 static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) { 214 if (!a) return b; 215 if (!b) return a; 216 return tri.composeSubRegIndices(a, b); 217 } 218 219 static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, 220 unsigned &Src, unsigned &Dst, 221 unsigned &SrcSub, unsigned &DstSub) { 222 if (MI->isCopy()) { 223 Dst = MI->getOperand(0).getReg(); 224 DstSub = MI->getOperand(0).getSubReg(); 225 Src = MI->getOperand(1).getReg(); 226 SrcSub = MI->getOperand(1).getSubReg(); 227 } else if (MI->isSubregToReg()) { 228 Dst = MI->getOperand(0).getReg(); 229 DstSub = compose(tri, MI->getOperand(0).getSubReg(), 230 MI->getOperand(3).getImm()); 231 Src = MI->getOperand(2).getReg(); 232 SrcSub = MI->getOperand(2).getSubReg(); 233 } else 234 return false; 235 return true; 236 } 237 238 bool CoalescerPair::setRegisters(const MachineInstr *MI) { 239 SrcReg = DstReg = SubIdx = 0; 240 NewRC = 0; 241 Flipped = CrossClass = false; 242 243 unsigned Src, Dst, SrcSub, DstSub; 244 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 245 return false; 246 Partial = SrcSub || DstSub; 247 248 // If one register is a physreg, it must be Dst. 249 if (TargetRegisterInfo::isPhysicalRegister(Src)) { 250 if (TargetRegisterInfo::isPhysicalRegister(Dst)) 251 return false; 252 std::swap(Src, Dst); 253 std::swap(SrcSub, DstSub); 254 Flipped = true; 255 } 256 257 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 258 259 if (TargetRegisterInfo::isPhysicalRegister(Dst)) { 260 // Eliminate DstSub on a physreg. 261 if (DstSub) { 262 Dst = TRI.getSubReg(Dst, DstSub); 263 if (!Dst) return false; 264 DstSub = 0; 265 } 266 267 // Eliminate SrcSub by picking a corresponding Dst superregister. 268 if (SrcSub) { 269 Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); 270 if (!Dst) return false; 271 SrcSub = 0; 272 } else if (!MRI.getRegClass(Src)->contains(Dst)) { 273 return false; 274 } 275 } else { 276 // Both registers are virtual. 277 278 // Both registers have subreg indices. 279 if (SrcSub && DstSub) { 280 // For now we only handle the case of identical indices in commensurate 281 // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg 282 // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg. 283 if (SrcSub != DstSub) 284 return false; 285 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 286 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 287 if (!TRI.getCommonSubClass(DstRC, SrcRC)) 288 return false; 289 SrcSub = DstSub = 0; 290 } 291 292 // There can be no SrcSub. 293 if (SrcSub) { 294 std::swap(Src, Dst); 295 DstSub = SrcSub; 296 SrcSub = 0; 297 assert(!Flipped && "Unexpected flip"); 298 Flipped = true; 299 } 300 301 // Find the new register class. 302 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 303 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 304 if (DstSub) 305 NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); 306 else 307 NewRC = TRI.getCommonSubClass(DstRC, SrcRC); 308 if (!NewRC) 309 return false; 310 CrossClass = NewRC != DstRC || NewRC != SrcRC; 311 } 312 // Check our invariants 313 assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); 314 assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && 315 "Cannot have a physical SubIdx"); 316 SrcReg = Src; 317 DstReg = Dst; 318 SubIdx = DstSub; 319 return true; 320 } 321 322 bool CoalescerPair::flip() { 323 if (SubIdx || TargetRegisterInfo::isPhysicalRegister(DstReg)) 324 return false; 325 std::swap(SrcReg, DstReg); 326 Flipped = !Flipped; 327 return true; 328 } 329 330 bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { 331 if (!MI) 332 return false; 333 unsigned Src, Dst, SrcSub, DstSub; 334 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 335 return false; 336 337 // Find the virtual register that is SrcReg. 338 if (Dst == SrcReg) { 339 std::swap(Src, Dst); 340 std::swap(SrcSub, DstSub); 341 } else if (Src != SrcReg) { 342 return false; 343 } 344 345 // Now check that Dst matches DstReg. 346 if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { 347 if (!TargetRegisterInfo::isPhysicalRegister(Dst)) 348 return false; 349 assert(!SubIdx && "Inconsistent CoalescerPair state."); 350 // DstSub could be set for a physreg from INSERT_SUBREG. 351 if (DstSub) 352 Dst = TRI.getSubReg(Dst, DstSub); 353 // Full copy of Src. 354 if (!SrcSub) 355 return DstReg == Dst; 356 // This is a partial register copy. Check that the parts match. 357 return TRI.getSubReg(DstReg, SrcSub) == Dst; 358 } else { 359 // DstReg is virtual. 360 if (DstReg != Dst) 361 return false; 362 // Registers match, do the subregisters line up? 363 return compose(TRI, SubIdx, SrcSub) == DstSub; 364 } 365 } 366 367 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { 368 AU.setPreservesCFG(); 369 AU.addRequired<AliasAnalysis>(); 370 AU.addRequired<LiveIntervals>(); 371 AU.addPreserved<LiveIntervals>(); 372 AU.addRequired<LiveDebugVariables>(); 373 AU.addPreserved<LiveDebugVariables>(); 374 AU.addPreserved<SlotIndexes>(); 375 AU.addRequired<MachineLoopInfo>(); 376 AU.addPreserved<MachineLoopInfo>(); 377 AU.addPreservedID(MachineDominatorsID); 378 AU.addPreservedID(StrongPHIEliminationID); 379 AU.addPreservedID(PHIEliminationID); 380 AU.addPreservedID(TwoAddressInstructionPassID); 381 MachineFunctionPass::getAnalysisUsage(AU); 382 } 383 384 void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) { 385 /// Joined copies are not deleted immediately, but kept in JoinedCopies. 386 JoinedCopies.insert(CopyMI); 387 388 /// Mark all register operands of CopyMI as <undef> so they won't affect dead 389 /// code elimination. 390 for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), 391 E = CopyMI->operands_end(); I != E; ++I) 392 if (I->isReg()) 393 I->setIsUndef(true); 394 } 395 396 /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA 397 /// being the source and IntB being the dest, thus this defines a value number 398 /// in IntB. If the source value number (in IntA) is defined by a copy from B, 399 /// see if we can merge these two pieces of B into a single value number, 400 /// eliminating a copy. For example: 401 /// 402 /// A3 = B0 403 /// ... 404 /// B1 = A3 <- this copy 405 /// 406 /// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 407 /// value number to be replaced with B0 (which simplifies the B liveinterval). 408 /// 409 /// This returns true if an interval was modified. 410 /// 411 bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, 412 MachineInstr *CopyMI) { 413 // Bail if there is no dst interval - can happen when merging physical subreg 414 // operations. 415 if (!LIS->hasInterval(CP.getDstReg())) 416 return false; 417 418 LiveInterval &IntA = 419 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 420 LiveInterval &IntB = 421 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 422 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); 423 424 // BValNo is a value number in B that is defined by a copy from A. 'B3' in 425 // the example above. 426 LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); 427 if (BLR == IntB.end()) return false; 428 VNInfo *BValNo = BLR->valno; 429 430 // Get the location that B is defined at. Two options: either this value has 431 // an unknown definition point or it is defined at CopyIdx. If unknown, we 432 // can't process it. 433 if (BValNo->def != CopyIdx) return false; 434 435 // AValNo is the value number in A that defines the copy, A3 in the example. 436 SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); 437 LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); 438 // The live range might not exist after fun with physreg coalescing. 439 if (ALR == IntA.end()) return false; 440 VNInfo *AValNo = ALR->valno; 441 442 // If AValNo is defined as a copy from IntB, we can potentially process this. 443 // Get the instruction that defines this value number. 444 MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); 445 if (!CP.isCoalescable(ACopyMI)) 446 return false; 447 448 // Get the LiveRange in IntB that this value number starts with. 449 LiveInterval::iterator ValLR = 450 IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); 451 if (ValLR == IntB.end()) 452 return false; 453 454 // Make sure that the end of the live range is inside the same block as 455 // CopyMI. 456 MachineInstr *ValLREndInst = 457 LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); 458 if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) 459 return false; 460 461 // Okay, we now know that ValLR ends in the same block that the CopyMI 462 // live-range starts. If there are no intervening live ranges between them in 463 // IntB, we can merge them. 464 if (ValLR+1 != BLR) return false; 465 466 // If a live interval is a physical register, conservatively check if any 467 // of its aliases is overlapping the live interval of the virtual register. 468 // If so, do not coalesce. 469 if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { 470 for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) 471 if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) { 472 DEBUG({ 473 dbgs() << "\t\tInterfere with alias "; 474 LIS->getInterval(*AS).print(dbgs(), TRI); 475 }); 476 return false; 477 } 478 } 479 480 DEBUG({ 481 dbgs() << "Extending: "; 482 IntB.print(dbgs(), TRI); 483 }); 484 485 SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; 486 // We are about to delete CopyMI, so need to remove it as the 'instruction 487 // that defines this value #'. Update the valnum with the new defining 488 // instruction #. 489 BValNo->def = FillerStart; 490 491 // Okay, we can merge them. We need to insert a new liverange: 492 // [ValLR.end, BLR.begin) of either value number, then we merge the 493 // two value numbers. 494 IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); 495 496 // If the IntB live range is assigned to a physical register, and if that 497 // physreg has sub-registers, update their live intervals as well. 498 if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { 499 for (const unsigned *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) { 500 if (!LIS->hasInterval(*SR)) 501 continue; 502 LiveInterval &SRLI = LIS->getInterval(*SR); 503 SRLI.addRange(LiveRange(FillerStart, FillerEnd, 504 SRLI.getNextValue(FillerStart, 505 LIS->getVNInfoAllocator()))); 506 } 507 } 508 509 // Okay, merge "B1" into the same value number as "B0". 510 if (BValNo != ValLR->valno) { 511 // If B1 is killed by a PHI, then the merged live range must also be killed 512 // by the same PHI, as B0 and B1 can not overlap. 513 bool HasPHIKill = BValNo->hasPHIKill(); 514 IntB.MergeValueNumberInto(BValNo, ValLR->valno); 515 if (HasPHIKill) 516 ValLR->valno->setHasPHIKill(true); 517 } 518 DEBUG({ 519 dbgs() << " result = "; 520 IntB.print(dbgs(), TRI); 521 dbgs() << "\n"; 522 }); 523 524 // If the source instruction was killing the source register before the 525 // merge, unset the isKill marker given the live range has been extended. 526 int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); 527 if (UIdx != -1) { 528 ValLREndInst->getOperand(UIdx).setIsKill(false); 529 } 530 531 // Rewrite the copy. If the copy instruction was killing the destination 532 // register before the merge, find the last use and trim the live range. That 533 // will also add the isKill marker. 534 CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(), 535 *TRI); 536 if (ALR->end == CopyIdx) 537 LIS->shrinkToUses(&IntA); 538 539 ++numExtends; 540 return true; 541 } 542 543 /// HasOtherReachingDefs - Return true if there are definitions of IntB 544 /// other than BValNo val# that can reach uses of AValno val# of IntA. 545 bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, 546 LiveInterval &IntB, 547 VNInfo *AValNo, 548 VNInfo *BValNo) { 549 for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); 550 AI != AE; ++AI) { 551 if (AI->valno != AValNo) continue; 552 LiveInterval::Ranges::iterator BI = 553 std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); 554 if (BI != IntB.ranges.begin()) 555 --BI; 556 for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { 557 if (BI->valno == BValNo) 558 continue; 559 if (BI->start <= AI->start && BI->end > AI->start) 560 return true; 561 if (BI->start > AI->start && BI->start < AI->end) 562 return true; 563 } 564 } 565 return false; 566 } 567 568 /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with 569 /// IntA being the source and IntB being the dest, thus this defines a value 570 /// number in IntB. If the source value number (in IntA) is defined by a 571 /// commutable instruction and its other operand is coalesced to the copy dest 572 /// register, see if we can transform the copy into a noop by commuting the 573 /// definition. For example, 574 /// 575 /// A3 = op A2 B0<kill> 576 /// ... 577 /// B1 = A3 <- this copy 578 /// ... 579 /// = op A3 <- more uses 580 /// 581 /// ==> 582 /// 583 /// B2 = op B0 A2<kill> 584 /// ... 585 /// B1 = B2 <- now an identify copy 586 /// ... 587 /// = op B2 <- more uses 588 /// 589 /// This returns true if an interval was modified. 590 /// 591 bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, 592 MachineInstr *CopyMI) { 593 // FIXME: For now, only eliminate the copy by commuting its def when the 594 // source register is a virtual register. We want to guard against cases 595 // where the copy is a back edge copy and commuting the def lengthen the 596 // live interval of the source register to the entire loop. 597 if (CP.isPhys() && CP.isFlipped()) 598 return false; 599 600 // Bail if there is no dst interval. 601 if (!LIS->hasInterval(CP.getDstReg())) 602 return false; 603 604 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); 605 606 LiveInterval &IntA = 607 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 608 LiveInterval &IntB = 609 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 610 611 // BValNo is a value number in B that is defined by a copy from A. 'B3' in 612 // the example above. 613 VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); 614 if (!BValNo || BValNo->def != CopyIdx) 615 return false; 616 617 assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); 618 619 // AValNo is the value number in A that defines the copy, A3 in the example. 620 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); 621 assert(AValNo && "COPY source not live"); 622 623 // If other defs can reach uses of this def, then it's not safe to perform 624 // the optimization. 625 if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) 626 return false; 627 MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); 628 if (!DefMI) 629 return false; 630 if (!DefMI->isCommutable()) 631 return false; 632 // If DefMI is a two-address instruction then commuting it will change the 633 // destination register. 634 int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); 635 assert(DefIdx != -1); 636 unsigned UseOpIdx; 637 if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) 638 return false; 639 unsigned Op1, Op2, NewDstIdx; 640 if (!TII->findCommutedOpIndices(DefMI, Op1, Op2)) 641 return false; 642 if (Op1 == UseOpIdx) 643 NewDstIdx = Op2; 644 else if (Op2 == UseOpIdx) 645 NewDstIdx = Op1; 646 else 647 return false; 648 649 MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); 650 unsigned NewReg = NewDstMO.getReg(); 651 if (NewReg != IntB.reg || !NewDstMO.isKill()) 652 return false; 653 654 // Make sure there are no other definitions of IntB that would reach the 655 // uses which the new definition can reach. 656 if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) 657 return false; 658 659 // Abort if the aliases of IntB.reg have values that are not simply the 660 // clobbers from the superreg. 661 if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) 662 for (const unsigned *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) 663 if (LIS->hasInterval(*AS) && 664 HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0)) 665 return false; 666 667 // If some of the uses of IntA.reg is already coalesced away, return false. 668 // It's not possible to determine whether it's safe to perform the coalescing. 669 for (MachineRegisterInfo::use_nodbg_iterator UI = 670 MRI->use_nodbg_begin(IntA.reg), 671 UE = MRI->use_nodbg_end(); UI != UE; ++UI) { 672 MachineInstr *UseMI = &*UI; 673 SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); 674 LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); 675 if (ULR == IntA.end()) 676 continue; 677 if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) 678 return false; 679 } 680 681 DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' 682 << *DefMI); 683 684 // At this point we have decided that it is legal to do this 685 // transformation. Start by commuting the instruction. 686 MachineBasicBlock *MBB = DefMI->getParent(); 687 MachineInstr *NewMI = TII->commuteInstruction(DefMI); 688 if (!NewMI) 689 return false; 690 if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && 691 TargetRegisterInfo::isVirtualRegister(IntB.reg) && 692 !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) 693 return false; 694 if (NewMI != DefMI) { 695 LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); 696 MachineBasicBlock::iterator Pos = DefMI; 697 MBB->insert(Pos, NewMI); 698 MBB->erase(DefMI); 699 } 700 unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); 701 NewMI->getOperand(OpIdx).setIsKill(); 702 703 // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. 704 // A = or A, B 705 // ... 706 // B = A 707 // ... 708 // C = A<kill> 709 // ... 710 // = B 711 712 // Update uses of IntA of the specific Val# with IntB. 713 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), 714 UE = MRI->use_end(); UI != UE;) { 715 MachineOperand &UseMO = UI.getOperand(); 716 MachineInstr *UseMI = &*UI; 717 ++UI; 718 if (JoinedCopies.count(UseMI)) 719 continue; 720 if (UseMI->isDebugValue()) { 721 // FIXME These don't have an instruction index. Not clear we have enough 722 // info to decide whether to do this replacement or not. For now do it. 723 UseMO.setReg(NewReg); 724 continue; 725 } 726 SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); 727 LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); 728 if (ULR == IntA.end() || ULR->valno != AValNo) 729 continue; 730 if (TargetRegisterInfo::isPhysicalRegister(NewReg)) 731 UseMO.substPhysReg(NewReg, *TRI); 732 else 733 UseMO.setReg(NewReg); 734 if (UseMI == CopyMI) 735 continue; 736 if (!UseMI->isCopy()) 737 continue; 738 if (UseMI->getOperand(0).getReg() != IntB.reg || 739 UseMI->getOperand(0).getSubReg()) 740 continue; 741 742 // This copy will become a noop. If it's defining a new val#, merge it into 743 // BValNo. 744 SlotIndex DefIdx = UseIdx.getRegSlot(); 745 VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); 746 if (!DVNI) 747 continue; 748 DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); 749 assert(DVNI->def == DefIdx); 750 BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); 751 markAsJoined(UseMI); 752 } 753 754 // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition 755 // is updated. 756 VNInfo *ValNo = BValNo; 757 ValNo->def = AValNo->def; 758 for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); 759 AI != AE; ++AI) { 760 if (AI->valno != AValNo) continue; 761 IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); 762 } 763 DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); 764 765 IntA.removeValNo(AValNo); 766 DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); 767 ++numCommutes; 768 return true; 769 } 770 771 /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial 772 /// computation, replace the copy by rematerialize the definition. 773 bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, 774 bool preserveSrcInt, 775 unsigned DstReg, 776 MachineInstr *CopyMI) { 777 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); 778 LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); 779 assert(SrcLR != SrcInt.end() && "Live range not found!"); 780 VNInfo *ValNo = SrcLR->valno; 781 if (ValNo->isPHIDef() || ValNo->isUnused()) 782 return false; 783 MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); 784 if (!DefMI) 785 return false; 786 assert(DefMI && "Defining instruction disappeared"); 787 if (!DefMI->isAsCheapAsAMove()) 788 return false; 789 if (!TII->isTriviallyReMaterializable(DefMI, AA)) 790 return false; 791 bool SawStore = false; 792 if (!DefMI->isSafeToMove(TII, AA, SawStore)) 793 return false; 794 const MCInstrDesc &MCID = DefMI->getDesc(); 795 if (MCID.getNumDefs() != 1) 796 return false; 797 if (!DefMI->isImplicitDef()) { 798 // Make sure the copy destination register class fits the instruction 799 // definition register class. The mismatch can happen as a result of earlier 800 // extract_subreg, insert_subreg, subreg_to_reg coalescing. 801 const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI); 802 if (TargetRegisterInfo::isVirtualRegister(DstReg)) { 803 if (MRI->getRegClass(DstReg) != RC) 804 return false; 805 } else if (!RC->contains(DstReg)) 806 return false; 807 } 808 809 MachineBasicBlock *MBB = CopyMI->getParent(); 810 MachineBasicBlock::iterator MII = 811 llvm::next(MachineBasicBlock::iterator(CopyMI)); 812 TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); 813 MachineInstr *NewMI = prior(MII); 814 815 // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). 816 // We need to remember these so we can add intervals once we insert 817 // NewMI into SlotIndexes. 818 SmallVector<unsigned, 4> NewMIImplDefs; 819 for (unsigned i = NewMI->getDesc().getNumOperands(), 820 e = NewMI->getNumOperands(); i != e; ++i) { 821 MachineOperand &MO = NewMI->getOperand(i); 822 if (MO.isReg()) { 823 assert(MO.isDef() && MO.isImplicit() && MO.isDead()); 824 NewMIImplDefs.push_back(MO.getReg()); 825 } 826 } 827 828 // CopyMI may have implicit operands, transfer them over to the newly 829 // rematerialized instruction. And update implicit def interval valnos. 830 for (unsigned i = CopyMI->getDesc().getNumOperands(), 831 e = CopyMI->getNumOperands(); i != e; ++i) { 832 MachineOperand &MO = CopyMI->getOperand(i); 833 if (MO.isReg() && MO.isImplicit()) 834 NewMI->addOperand(MO); 835 } 836 837 LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); 838 839 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); 840 for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { 841 unsigned reg = NewMIImplDefs[i]; 842 LiveInterval &li = LIS->getInterval(reg); 843 VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(), 844 LIS->getVNInfoAllocator()); 845 LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN); 846 li.addRange(lr); 847 } 848 849 NewMI->copyImplicitOps(CopyMI); 850 CopyMI->eraseFromParent(); 851 ReMatCopies.insert(CopyMI); 852 ReMatDefs.insert(DefMI); 853 DEBUG(dbgs() << "Remat: " << *NewMI); 854 ++NumReMats; 855 856 // The source interval can become smaller because we removed a use. 857 if (preserveSrcInt) 858 LIS->shrinkToUses(&SrcInt); 859 860 return true; 861 } 862 863 /// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef> 864 /// values, it only removes local variables. When we have a copy like: 865 /// 866 /// %vreg1 = COPY %vreg2<undef> 867 /// 868 /// We delete the copy and remove the corresponding value number from %vreg1. 869 /// Any uses of that value number are marked as <undef>. 870 bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, 871 const CoalescerPair &CP) { 872 SlotIndex Idx = LIS->getInstructionIndex(CopyMI); 873 LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg()); 874 if (SrcInt->liveAt(Idx)) 875 return false; 876 LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg()); 877 if (DstInt->liveAt(Idx)) 878 return false; 879 880 // No intervals are live-in to CopyMI - it is undef. 881 if (CP.isFlipped()) 882 DstInt = SrcInt; 883 SrcInt = 0; 884 885 VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); 886 assert(DeadVNI && "No value defined in DstInt"); 887 DstInt->removeValNo(DeadVNI); 888 889 // Find new undef uses. 890 for (MachineRegisterInfo::reg_nodbg_iterator 891 I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end(); 892 I != E; ++I) { 893 MachineOperand &MO = I.getOperand(); 894 if (MO.isDef() || MO.isUndef()) 895 continue; 896 MachineInstr *MI = MO.getParent(); 897 SlotIndex Idx = LIS->getInstructionIndex(MI); 898 if (DstInt->liveAt(Idx)) 899 continue; 900 MO.setIsUndef(true); 901 DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI); 902 } 903 return true; 904 } 905 906 /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and 907 /// update the subregister number if it is not zero. If DstReg is a 908 /// physical register and the existing subregister number of the def / use 909 /// being updated is not zero, make sure to set it to the correct physical 910 /// subregister. 911 void 912 RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { 913 bool DstIsPhys = CP.isPhys(); 914 unsigned SrcReg = CP.getSrcReg(); 915 unsigned DstReg = CP.getDstReg(); 916 unsigned SubIdx = CP.getSubIdx(); 917 918 // Update LiveDebugVariables. 919 LDV->renameRegister(SrcReg, DstReg, SubIdx); 920 921 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); 922 MachineInstr *UseMI = I.skipInstruction();) { 923 // A PhysReg copy that won't be coalesced can perhaps be rematerialized 924 // instead. 925 if (DstIsPhys) { 926 if (UseMI->isFullCopy() && 927 UseMI->getOperand(1).getReg() == SrcReg && 928 UseMI->getOperand(0).getReg() != SrcReg && 929 UseMI->getOperand(0).getReg() != DstReg && 930 !JoinedCopies.count(UseMI) && 931 ReMaterializeTrivialDef(LIS->getInterval(SrcReg), false, 932 UseMI->getOperand(0).getReg(), UseMI)) 933 continue; 934 } 935 936 SmallVector<unsigned,8> Ops; 937 bool Reads, Writes; 938 tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); 939 bool Kills = false, Deads = false; 940 941 // Replace SrcReg with DstReg in all UseMI operands. 942 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 943 MachineOperand &MO = UseMI->getOperand(Ops[i]); 944 Kills |= MO.isKill(); 945 Deads |= MO.isDead(); 946 947 // Make sure we don't create read-modify-write defs accidentally. We 948 // assume here that a SrcReg def cannot be joined into a live DstReg. If 949 // RegisterCoalescer starts tracking partially live registers, we will 950 // need to check the actual LiveInterval to determine if DstReg is live 951 // here. 952 if (SubIdx && !Reads) 953 MO.setIsUndef(); 954 955 if (DstIsPhys) 956 MO.substPhysReg(DstReg, *TRI); 957 else 958 MO.substVirtReg(DstReg, SubIdx, *TRI); 959 } 960 961 // This instruction is a copy that will be removed. 962 if (JoinedCopies.count(UseMI)) 963 continue; 964 965 if (SubIdx) { 966 // If UseMI was a simple SrcReg def, make sure we didn't turn it into a 967 // read-modify-write of DstReg. 968 if (Deads) 969 UseMI->addRegisterDead(DstReg, TRI); 970 else if (!Reads && Writes) 971 UseMI->addRegisterDefined(DstReg, TRI); 972 973 // Kill flags apply to the whole physical register. 974 if (DstIsPhys && Kills) 975 UseMI->addRegisterKilled(DstReg, TRI); 976 } 977 978 DEBUG({ 979 dbgs() << "\t\tupdated: "; 980 if (!UseMI->isDebugValue()) 981 dbgs() << LIS->getInstructionIndex(UseMI) << "\t"; 982 dbgs() << *UseMI; 983 }); 984 } 985 } 986 987 /// removeIntervalIfEmpty - Check if the live interval of a physical register 988 /// is empty, if so remove it and also remove the empty intervals of its 989 /// sub-registers. Return true if live interval is removed. 990 static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, 991 const TargetRegisterInfo *TRI) { 992 if (li.empty()) { 993 if (TargetRegisterInfo::isPhysicalRegister(li.reg)) 994 for (const unsigned* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) { 995 if (!LIS->hasInterval(*SR)) 996 continue; 997 LiveInterval &sli = LIS->getInterval(*SR); 998 if (sli.empty()) 999 LIS->removeInterval(*SR); 1000 } 1001 LIS->removeInterval(li.reg); 1002 return true; 1003 } 1004 return false; 1005 } 1006 1007 /// RemoveDeadDef - If a def of a live interval is now determined dead, remove 1008 /// the val# it defines. If the live interval becomes empty, remove it as well. 1009 bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, 1010 MachineInstr *DefMI) { 1011 SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot(); 1012 LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); 1013 if (DefIdx != MLR->valno->def) 1014 return false; 1015 li.removeValNo(MLR->valno); 1016 return removeIntervalIfEmpty(li, LIS, TRI); 1017 } 1018 1019 /// shouldJoinPhys - Return true if a copy involving a physreg should be joined. 1020 /// We need to be careful about coalescing a source physical register with a 1021 /// virtual register. Once the coalescing is done, it cannot be broken and these 1022 /// are not spillable! If the destination interval uses are far away, think 1023 /// twice about coalescing them! 1024 bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) { 1025 bool Allocatable = LIS->isAllocatable(CP.getDstReg()); 1026 LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); 1027 1028 /// Always join simple intervals that are defined by a single copy from a 1029 /// reserved register. This doesn't increase register pressure, so it is 1030 /// always beneficial. 1031 if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) 1032 return true; 1033 1034 if (!EnablePhysicalJoin) { 1035 DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); 1036 return false; 1037 } 1038 1039 // Only coalesce to allocatable physreg, we don't want to risk modifying 1040 // reserved registers. 1041 if (!Allocatable) { 1042 DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); 1043 return false; // Not coalescable. 1044 } 1045 1046 // Don't join with physregs that have a ridiculous number of live 1047 // ranges. The data structure performance is really bad when that 1048 // happens. 1049 if (LIS->hasInterval(CP.getDstReg()) && 1050 LIS->getInterval(CP.getDstReg()).ranges.size() > 1000) { 1051 ++numAborts; 1052 DEBUG(dbgs() 1053 << "\tPhysical register live interval too complicated, abort!\n"); 1054 return false; 1055 } 1056 1057 // FIXME: Why are we skipping this test for partial copies? 1058 // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. 1059 if (!CP.isPartial()) { 1060 const TargetRegisterClass *RC = MRI->getRegClass(CP.getSrcReg()); 1061 unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; 1062 unsigned Length = LIS->getApproximateInstructionCount(JoinVInt); 1063 if (Length > Threshold) { 1064 ++numAborts; 1065 DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); 1066 return false; 1067 } 1068 } 1069 return true; 1070 } 1071 1072 /// isWinToJoinCrossClass - Return true if it's profitable to coalesce 1073 /// two virtual registers from different register classes. 1074 bool 1075 RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg, 1076 unsigned DstReg, 1077 const TargetRegisterClass *SrcRC, 1078 const TargetRegisterClass *DstRC, 1079 const TargetRegisterClass *NewRC) { 1080 unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); 1081 // This heuristics is good enough in practice, but it's obviously not *right*. 1082 // 4 is a magic number that works well enough for x86, ARM, etc. It filter 1083 // out all but the most restrictive register classes. 1084 if (NewRCCount > 4 || 1085 // Early exit if the function is fairly small, coalesce aggressively if 1086 // that's the case. For really special register classes with 3 or 1087 // fewer registers, be a bit more careful. 1088 (LIS->getFuncInstructionCount() / NewRCCount) < 8) 1089 return true; 1090 LiveInterval &SrcInt = LIS->getInterval(SrcReg); 1091 LiveInterval &DstInt = LIS->getInterval(DstReg); 1092 unsigned SrcSize = LIS->getApproximateInstructionCount(SrcInt); 1093 unsigned DstSize = LIS->getApproximateInstructionCount(DstInt); 1094 1095 // Coalesce aggressively if the intervals are small compared to the number of 1096 // registers in the new class. The number 4 is fairly arbitrary, chosen to be 1097 // less aggressive than the 8 used for the whole function size. 1098 const unsigned ThresSize = 4 * NewRCCount; 1099 if (SrcSize <= ThresSize && DstSize <= ThresSize) 1100 return true; 1101 1102 // Estimate *register use density*. If it doubles or more, abort. 1103 unsigned SrcUses = std::distance(MRI->use_nodbg_begin(SrcReg), 1104 MRI->use_nodbg_end()); 1105 unsigned DstUses = std::distance(MRI->use_nodbg_begin(DstReg), 1106 MRI->use_nodbg_end()); 1107 unsigned NewUses = SrcUses + DstUses; 1108 unsigned NewSize = SrcSize + DstSize; 1109 if (SrcRC != NewRC && SrcSize > ThresSize) { 1110 unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); 1111 if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) 1112 return false; 1113 } 1114 if (DstRC != NewRC && DstSize > ThresSize) { 1115 unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); 1116 if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) 1117 return false; 1118 } 1119 return true; 1120 } 1121 1122 1123 /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, 1124 /// which are the src/dst of the copy instruction CopyMI. This returns true 1125 /// if the copy was successfully coalesced away. If it is not currently 1126 /// possible to coalesce this interval, but it may be possible if other 1127 /// things get coalesced, then it returns true by reference in 'Again'. 1128 bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { 1129 1130 Again = false; 1131 if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) 1132 return false; // Already done. 1133 1134 DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); 1135 1136 CoalescerPair CP(*TII, *TRI); 1137 if (!CP.setRegisters(CopyMI)) { 1138 DEBUG(dbgs() << "\tNot coalescable.\n"); 1139 return false; 1140 } 1141 1142 // If they are already joined we continue. 1143 if (CP.getSrcReg() == CP.getDstReg()) { 1144 markAsJoined(CopyMI); 1145 DEBUG(dbgs() << "\tCopy already coalesced.\n"); 1146 return false; // Not coalescable. 1147 } 1148 1149 // Eliminate undefs. 1150 if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { 1151 markAsJoined(CopyMI); 1152 DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); 1153 return false; // Not coalescable. 1154 } 1155 1156 DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) 1157 << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSubIdx()) 1158 << "\n"); 1159 1160 // Enforce policies. 1161 if (CP.isPhys()) { 1162 if (!shouldJoinPhys(CP)) { 1163 // Before giving up coalescing, if definition of source is defined by 1164 // trivial computation, try rematerializing it. 1165 if (!CP.isFlipped() && 1166 ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true, 1167 CP.getDstReg(), CopyMI)) 1168 return true; 1169 return false; 1170 } 1171 } else { 1172 // Avoid constraining virtual register regclass too much. 1173 if (CP.isCrossClass()) { 1174 DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); 1175 if (DisableCrossClassJoin) { 1176 DEBUG(dbgs() << "\tCross-class joins disabled.\n"); 1177 return false; 1178 } 1179 if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), 1180 MRI->getRegClass(CP.getSrcReg()), 1181 MRI->getRegClass(CP.getDstReg()), 1182 CP.getNewRC())) { 1183 DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); 1184 Again = true; // May be possible to coalesce later. 1185 return false; 1186 } 1187 } 1188 1189 // When possible, let DstReg be the larger interval. 1190 if (!CP.getSubIdx() && LIS->getInterval(CP.getSrcReg()).ranges.size() > 1191 LIS->getInterval(CP.getDstReg()).ranges.size()) 1192 CP.flip(); 1193 } 1194 1195 // Okay, attempt to join these two intervals. On failure, this returns false. 1196 // Otherwise, if one of the intervals being joined is a physreg, this method 1197 // always canonicalizes DstInt to be it. The output "SrcInt" will not have 1198 // been modified, so we can use this information below to update aliases. 1199 if (!JoinIntervals(CP)) { 1200 // Coalescing failed. 1201 1202 // If definition of source is defined by trivial computation, try 1203 // rematerializing it. 1204 if (!CP.isFlipped() && 1205 ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true, 1206 CP.getDstReg(), CopyMI)) 1207 return true; 1208 1209 // If we can eliminate the copy without merging the live ranges, do so now. 1210 if (!CP.isPartial()) { 1211 if (AdjustCopiesBackFrom(CP, CopyMI) || 1212 RemoveCopyByCommutingDef(CP, CopyMI)) { 1213 markAsJoined(CopyMI); 1214 DEBUG(dbgs() << "\tTrivial!\n"); 1215 return true; 1216 } 1217 } 1218 1219 // Otherwise, we are unable to join the intervals. 1220 DEBUG(dbgs() << "\tInterference!\n"); 1221 Again = true; // May be possible to coalesce later. 1222 return false; 1223 } 1224 1225 // Coalescing to a virtual register that is of a sub-register class of the 1226 // other. Make sure the resulting register is set to the right register class. 1227 if (CP.isCrossClass()) { 1228 ++numCrossRCs; 1229 MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); 1230 } 1231 1232 // Remember to delete the copy instruction. 1233 markAsJoined(CopyMI); 1234 1235 UpdateRegDefsUses(CP); 1236 1237 // If we have extended the live range of a physical register, make sure we 1238 // update live-in lists as well. 1239 if (CP.isPhys()) { 1240 SmallVector<MachineBasicBlock*, 16> BlockSeq; 1241 // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the 1242 // ranges for this, and they are preserved. 1243 LiveInterval &SrcInt = LIS->getInterval(CP.getSrcReg()); 1244 for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); 1245 I != E; ++I ) { 1246 LIS->findLiveInMBBs(I->start, I->end, BlockSeq); 1247 for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { 1248 MachineBasicBlock &block = *BlockSeq[idx]; 1249 if (!block.isLiveIn(CP.getDstReg())) 1250 block.addLiveIn(CP.getDstReg()); 1251 } 1252 BlockSeq.clear(); 1253 } 1254 } 1255 1256 // SrcReg is guaranteed to be the register whose live interval that is 1257 // being merged. 1258 LIS->removeInterval(CP.getSrcReg()); 1259 1260 // Update regalloc hint. 1261 TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); 1262 1263 DEBUG({ 1264 LiveInterval &DstInt = LIS->getInterval(CP.getDstReg()); 1265 dbgs() << "\tJoined. Result = "; 1266 DstInt.print(dbgs(), TRI); 1267 dbgs() << "\n"; 1268 }); 1269 1270 ++numJoins; 1271 return true; 1272 } 1273 1274 /// ComputeUltimateVN - Assuming we are going to join two live intervals, 1275 /// compute what the resultant value numbers for each value in the input two 1276 /// ranges will be. This is complicated by copies between the two which can 1277 /// and will commonly cause multiple value numbers to be merged into one. 1278 /// 1279 /// VN is the value number that we're trying to resolve. InstDefiningValue 1280 /// keeps track of the new InstDefiningValue assignment for the result 1281 /// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of 1282 /// whether a value in this or other is a copy from the opposite set. 1283 /// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have 1284 /// already been assigned. 1285 /// 1286 /// ThisFromOther[x] - If x is defined as a copy from the other interval, this 1287 /// contains the value number the copy is from. 1288 /// 1289 static unsigned ComputeUltimateVN(VNInfo *VNI, 1290 SmallVector<VNInfo*, 16> &NewVNInfo, 1291 DenseMap<VNInfo*, VNInfo*> &ThisFromOther, 1292 DenseMap<VNInfo*, VNInfo*> &OtherFromThis, 1293 SmallVector<int, 16> &ThisValNoAssignments, 1294 SmallVector<int, 16> &OtherValNoAssignments) { 1295 unsigned VN = VNI->id; 1296 1297 // If the VN has already been computed, just return it. 1298 if (ThisValNoAssignments[VN] >= 0) 1299 return ThisValNoAssignments[VN]; 1300 assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); 1301 1302 // If this val is not a copy from the other val, then it must be a new value 1303 // number in the destination. 1304 DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); 1305 if (I == ThisFromOther.end()) { 1306 NewVNInfo.push_back(VNI); 1307 return ThisValNoAssignments[VN] = NewVNInfo.size()-1; 1308 } 1309 VNInfo *OtherValNo = I->second; 1310 1311 // Otherwise, this *is* a copy from the RHS. If the other side has already 1312 // been computed, return it. 1313 if (OtherValNoAssignments[OtherValNo->id] >= 0) 1314 return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; 1315 1316 // Mark this value number as currently being computed, then ask what the 1317 // ultimate value # of the other value is. 1318 ThisValNoAssignments[VN] = -2; 1319 unsigned UltimateVN = 1320 ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, 1321 OtherValNoAssignments, ThisValNoAssignments); 1322 return ThisValNoAssignments[VN] = UltimateVN; 1323 } 1324 1325 1326 // Find out if we have something like 1327 // A = X 1328 // B = X 1329 // if so, we can pretend this is actually 1330 // A = X 1331 // B = A 1332 // which allows us to coalesce A and B. 1333 // VNI is the definition of B. LR is the life range of A that includes 1334 // the slot just before B. If we return true, we add "B = X" to DupCopies. 1335 // This implies that A dominates B. 1336 static bool RegistersDefinedFromSameValue(LiveIntervals &li, 1337 const TargetRegisterInfo &tri, 1338 CoalescerPair &CP, 1339 VNInfo *VNI, 1340 LiveRange *LR, 1341 SmallVector<MachineInstr*, 8> &DupCopies) { 1342 // FIXME: This is very conservative. For example, we don't handle 1343 // physical registers. 1344 1345 MachineInstr *MI = li.getInstructionFromIndex(VNI->def); 1346 1347 if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys()) 1348 return false; 1349 1350 unsigned Dst = MI->getOperand(0).getReg(); 1351 unsigned Src = MI->getOperand(1).getReg(); 1352 1353 if (!TargetRegisterInfo::isVirtualRegister(Src) || 1354 !TargetRegisterInfo::isVirtualRegister(Dst)) 1355 return false; 1356 1357 unsigned A = CP.getDstReg(); 1358 unsigned B = CP.getSrcReg(); 1359 1360 if (B == Dst) 1361 std::swap(A, B); 1362 assert(Dst == A); 1363 1364 VNInfo *Other = LR->valno; 1365 const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def); 1366 1367 if (!OtherMI || !OtherMI->isFullCopy()) 1368 return false; 1369 1370 unsigned OtherDst = OtherMI->getOperand(0).getReg(); 1371 unsigned OtherSrc = OtherMI->getOperand(1).getReg(); 1372 1373 if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) || 1374 !TargetRegisterInfo::isVirtualRegister(OtherDst)) 1375 return false; 1376 1377 assert(OtherDst == B); 1378 1379 if (Src != OtherSrc) 1380 return false; 1381 1382 // If the copies use two different value numbers of X, we cannot merge 1383 // A and B. 1384 LiveInterval &SrcInt = li.getInterval(Src); 1385 // getVNInfoBefore returns NULL for undef copies. In this case, the 1386 // optimization is still safe. 1387 if (SrcInt.getVNInfoBefore(Other->def) != SrcInt.getVNInfoBefore(VNI->def)) 1388 return false; 1389 1390 DupCopies.push_back(MI); 1391 1392 return true; 1393 } 1394 1395 /// JoinIntervals - Attempt to join these two intervals. On failure, this 1396 /// returns false. 1397 bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { 1398 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 1399 DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), TRI); dbgs() << "\n"; }); 1400 1401 // If a live interval is a physical register, check for interference with any 1402 // aliases. The interference check implemented here is a bit more conservative 1403 // than the full interfeence check below. We allow overlapping live ranges 1404 // only when one is a copy of the other. 1405 if (CP.isPhys()) { 1406 // Optimization for reserved registers like ESP. 1407 // We can only merge with a reserved physreg if RHS has a single value that 1408 // is a copy of CP.DstReg(). The live range of the reserved register will 1409 // look like a set of dead defs - we don't properly track the live range of 1410 // reserved registers. 1411 if (RegClassInfo.isReserved(CP.getDstReg())) { 1412 assert(CP.isFlipped() && RHS.containsOneValue() && 1413 "Invalid join with reserved register"); 1414 // Deny any overlapping intervals. This depends on all the reserved 1415 // register live ranges to look like dead defs. 1416 for (const unsigned *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) { 1417 if (!LIS->hasInterval(*AS)) 1418 continue; 1419 if (RHS.overlaps(LIS->getInterval(*AS))) { 1420 DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n'); 1421 return false; 1422 } 1423 } 1424 // Skip any value computations, we are not adding new values to the 1425 // reserved register. Also skip merging the live ranges, the reserved 1426 // register live range doesn't need to be accurate as long as all the 1427 // defs are there. 1428 return true; 1429 } 1430 1431 for (const unsigned *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){ 1432 if (!LIS->hasInterval(*AS)) 1433 continue; 1434 const LiveInterval &LHS = LIS->getInterval(*AS); 1435 LiveInterval::const_iterator LI = LHS.begin(); 1436 for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); 1437 RI != RE; ++RI) { 1438 LI = std::lower_bound(LI, LHS.end(), RI->start); 1439 // Does LHS have an overlapping live range starting before RI? 1440 if ((LI != LHS.begin() && LI[-1].end > RI->start) && 1441 (RI->start != RI->valno->def || 1442 !CP.isCoalescable(LIS->getInstructionFromIndex(RI->start)))) { 1443 DEBUG({ 1444 dbgs() << "\t\tInterference from alias: "; 1445 LHS.print(dbgs(), TRI); 1446 dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; 1447 }); 1448 return false; 1449 } 1450 1451 // Check that LHS ranges beginning in this range are copies. 1452 for (; LI != LHS.end() && LI->start < RI->end; ++LI) { 1453 if (LI->start != LI->valno->def || 1454 !CP.isCoalescable(LIS->getInstructionFromIndex(LI->start))) { 1455 DEBUG({ 1456 dbgs() << "\t\tInterference from alias: "; 1457 LHS.print(dbgs(), TRI); 1458 dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; 1459 }); 1460 return false; 1461 } 1462 } 1463 } 1464 } 1465 } 1466 1467 // Compute the final value assignment, assuming that the live ranges can be 1468 // coalesced. 1469 SmallVector<int, 16> LHSValNoAssignments; 1470 SmallVector<int, 16> RHSValNoAssignments; 1471 DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; 1472 DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; 1473 SmallVector<VNInfo*, 16> NewVNInfo; 1474 1475 SmallVector<MachineInstr*, 8> DupCopies; 1476 1477 LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg()); 1478 DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), TRI); dbgs() << "\n"; }); 1479 1480 // Loop over the value numbers of the LHS, seeing if any are defined from 1481 // the RHS. 1482 for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); 1483 i != e; ++i) { 1484 VNInfo *VNI = *i; 1485 if (VNI->isUnused() || VNI->isPHIDef()) 1486 continue; 1487 MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); 1488 assert(MI && "Missing def"); 1489 if (!MI->isCopyLike()) // Src not defined by a copy? 1490 continue; 1491 1492 // Figure out the value # from the RHS. 1493 LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); 1494 // The copy could be to an aliased physreg. 1495 if (!lr) continue; 1496 1497 // DstReg is known to be a register in the LHS interval. If the src is 1498 // from the RHS interval, we can use its value #. 1499 if (!CP.isCoalescable(MI) && 1500 !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) 1501 continue; 1502 1503 LHSValsDefinedFromRHS[VNI] = lr->valno; 1504 } 1505 1506 // Loop over the value numbers of the RHS, seeing if any are defined from 1507 // the LHS. 1508 for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); 1509 i != e; ++i) { 1510 VNInfo *VNI = *i; 1511 if (VNI->isUnused() || VNI->isPHIDef()) 1512 continue; 1513 MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); 1514 assert(MI && "Missing def"); 1515 if (!MI->isCopyLike()) // Src not defined by a copy? 1516 continue; 1517 1518 // Figure out the value # from the LHS. 1519 LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); 1520 // The copy could be to an aliased physreg. 1521 if (!lr) continue; 1522 1523 // DstReg is known to be a register in the RHS interval. If the src is 1524 // from the LHS interval, we can use its value #. 1525 if (!CP.isCoalescable(MI) && 1526 !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) 1527 continue; 1528 1529 RHSValsDefinedFromLHS[VNI] = lr->valno; 1530 } 1531 1532 LHSValNoAssignments.resize(LHS.getNumValNums(), -1); 1533 RHSValNoAssignments.resize(RHS.getNumValNums(), -1); 1534 NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); 1535 1536 for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); 1537 i != e; ++i) { 1538 VNInfo *VNI = *i; 1539 unsigned VN = VNI->id; 1540 if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 1541 continue; 1542 ComputeUltimateVN(VNI, NewVNInfo, 1543 LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, 1544 LHSValNoAssignments, RHSValNoAssignments); 1545 } 1546 for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); 1547 i != e; ++i) { 1548 VNInfo *VNI = *i; 1549 unsigned VN = VNI->id; 1550 if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 1551 continue; 1552 // If this value number isn't a copy from the LHS, it's a new number. 1553 if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { 1554 NewVNInfo.push_back(VNI); 1555 RHSValNoAssignments[VN] = NewVNInfo.size()-1; 1556 continue; 1557 } 1558 1559 ComputeUltimateVN(VNI, NewVNInfo, 1560 RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, 1561 RHSValNoAssignments, LHSValNoAssignments); 1562 } 1563 1564 // Armed with the mappings of LHS/RHS values to ultimate values, walk the 1565 // interval lists to see if these intervals are coalescable. 1566 LiveInterval::const_iterator I = LHS.begin(); 1567 LiveInterval::const_iterator IE = LHS.end(); 1568 LiveInterval::const_iterator J = RHS.begin(); 1569 LiveInterval::const_iterator JE = RHS.end(); 1570 1571 // Skip ahead until the first place of potential sharing. 1572 if (I != IE && J != JE) { 1573 if (I->start < J->start) { 1574 I = std::upper_bound(I, IE, J->start); 1575 if (I != LHS.begin()) --I; 1576 } else if (J->start < I->start) { 1577 J = std::upper_bound(J, JE, I->start); 1578 if (J != RHS.begin()) --J; 1579 } 1580 } 1581 1582 while (I != IE && J != JE) { 1583 // Determine if these two live ranges overlap. 1584 bool Overlaps; 1585 if (I->start < J->start) { 1586 Overlaps = I->end > J->start; 1587 } else { 1588 Overlaps = J->end > I->start; 1589 } 1590 1591 // If so, check value # info to determine if they are really different. 1592 if (Overlaps) { 1593 // If the live range overlap will map to the same value number in the 1594 // result liverange, we can still coalesce them. If not, we can't. 1595 if (LHSValNoAssignments[I->valno->id] != 1596 RHSValNoAssignments[J->valno->id]) 1597 return false; 1598 } 1599 1600 if (I->end < J->end) 1601 ++I; 1602 else 1603 ++J; 1604 } 1605 1606 // Update kill info. Some live ranges are extended due to copy coalescing. 1607 for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), 1608 E = LHSValsDefinedFromRHS.end(); I != E; ++I) { 1609 VNInfo *VNI = I->first; 1610 unsigned LHSValID = LHSValNoAssignments[VNI->id]; 1611 if (VNI->hasPHIKill()) 1612 NewVNInfo[LHSValID]->setHasPHIKill(true); 1613 } 1614 1615 // Update kill info. Some live ranges are extended due to copy coalescing. 1616 for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), 1617 E = RHSValsDefinedFromLHS.end(); I != E; ++I) { 1618 VNInfo *VNI = I->first; 1619 unsigned RHSValID = RHSValNoAssignments[VNI->id]; 1620 if (VNI->hasPHIKill()) 1621 NewVNInfo[RHSValID]->setHasPHIKill(true); 1622 } 1623 1624 if (LHSValNoAssignments.empty()) 1625 LHSValNoAssignments.push_back(-1); 1626 if (RHSValNoAssignments.empty()) 1627 RHSValNoAssignments.push_back(-1); 1628 1629 SmallVector<unsigned, 8> SourceRegisters; 1630 for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(), 1631 E = DupCopies.end(); I != E; ++I) { 1632 MachineInstr *MI = *I; 1633 1634 // We have pretended that the assignment to B in 1635 // A = X 1636 // B = X 1637 // was actually a copy from A. Now that we decided to coalesce A and B, 1638 // transform the code into 1639 // A = X 1640 // X = X 1641 // and mark the X as coalesced to keep the illusion. 1642 unsigned Src = MI->getOperand(1).getReg(); 1643 SourceRegisters.push_back(Src); 1644 MI->getOperand(0).substVirtReg(Src, 0, *TRI); 1645 1646 markAsJoined(MI); 1647 } 1648 1649 // If B = X was the last use of X in a liverange, we have to shrink it now 1650 // that B = X is gone. 1651 for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(), 1652 E = SourceRegisters.end(); I != E; ++I) { 1653 LIS->shrinkToUses(&LIS->getInterval(*I)); 1654 } 1655 1656 // If we get here, we know that we can coalesce the live ranges. Ask the 1657 // intervals to coalesce themselves now. 1658 LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, 1659 MRI); 1660 return true; 1661 } 1662 1663 namespace { 1664 // DepthMBBCompare - Comparison predicate that sort first based on the loop 1665 // depth of the basic block (the unsigned), and then on the MBB number. 1666 struct DepthMBBCompare { 1667 typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; 1668 bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { 1669 // Deeper loops first 1670 if (LHS.first != RHS.first) 1671 return LHS.first > RHS.first; 1672 1673 // Prefer blocks that are more connected in the CFG. This takes care of 1674 // the most difficult copies first while intervals are short. 1675 unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); 1676 unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); 1677 if (cl != cr) 1678 return cl > cr; 1679 1680 // As a last resort, sort by block number. 1681 return LHS.second->getNumber() < RHS.second->getNumber(); 1682 } 1683 }; 1684 } 1685 1686 void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB, 1687 std::vector<MachineInstr*> &TryAgain) { 1688 DEBUG(dbgs() << MBB->getName() << ":\n"); 1689 1690 SmallVector<MachineInstr*, 8> VirtCopies; 1691 SmallVector<MachineInstr*, 8> PhysCopies; 1692 SmallVector<MachineInstr*, 8> ImpDefCopies; 1693 for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); 1694 MII != E;) { 1695 MachineInstr *Inst = MII++; 1696 1697 // If this isn't a copy nor a extract_subreg, we can't join intervals. 1698 unsigned SrcReg, DstReg; 1699 if (Inst->isCopy()) { 1700 DstReg = Inst->getOperand(0).getReg(); 1701 SrcReg = Inst->getOperand(1).getReg(); 1702 } else if (Inst->isSubregToReg()) { 1703 DstReg = Inst->getOperand(0).getReg(); 1704 SrcReg = Inst->getOperand(2).getReg(); 1705 } else 1706 continue; 1707 1708 bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); 1709 bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); 1710 if (LIS->hasInterval(SrcReg) && LIS->getInterval(SrcReg).empty()) 1711 ImpDefCopies.push_back(Inst); 1712 else if (SrcIsPhys || DstIsPhys) 1713 PhysCopies.push_back(Inst); 1714 else 1715 VirtCopies.push_back(Inst); 1716 } 1717 1718 // Try coalescing implicit copies and insert_subreg <undef> first, 1719 // followed by copies to / from physical registers, then finally copies 1720 // from virtual registers to virtual registers. 1721 for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { 1722 MachineInstr *TheCopy = ImpDefCopies[i]; 1723 bool Again = false; 1724 if (!JoinCopy(TheCopy, Again)) 1725 if (Again) 1726 TryAgain.push_back(TheCopy); 1727 } 1728 for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { 1729 MachineInstr *TheCopy = PhysCopies[i]; 1730 bool Again = false; 1731 if (!JoinCopy(TheCopy, Again)) 1732 if (Again) 1733 TryAgain.push_back(TheCopy); 1734 } 1735 for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { 1736 MachineInstr *TheCopy = VirtCopies[i]; 1737 bool Again = false; 1738 if (!JoinCopy(TheCopy, Again)) 1739 if (Again) 1740 TryAgain.push_back(TheCopy); 1741 } 1742 } 1743 1744 void RegisterCoalescer::joinIntervals() { 1745 DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); 1746 1747 std::vector<MachineInstr*> TryAgainList; 1748 if (Loops->empty()) { 1749 // If there are no loops in the function, join intervals in function order. 1750 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); 1751 I != E; ++I) 1752 CopyCoalesceInMBB(I, TryAgainList); 1753 } else { 1754 // Otherwise, join intervals in inner loops before other intervals. 1755 // Unfortunately we can't just iterate over loop hierarchy here because 1756 // there may be more MBB's than BB's. Collect MBB's for sorting. 1757 1758 // Join intervals in the function prolog first. We want to join physical 1759 // registers with virtual registers before the intervals got too long. 1760 std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; 1761 for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ 1762 MachineBasicBlock *MBB = I; 1763 MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I)); 1764 } 1765 1766 // Sort by loop depth. 1767 std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); 1768 1769 // Finally, join intervals in loop nest order. 1770 for (unsigned i = 0, e = MBBs.size(); i != e; ++i) 1771 CopyCoalesceInMBB(MBBs[i].second, TryAgainList); 1772 } 1773 1774 // Joining intervals can allow other intervals to be joined. Iteratively join 1775 // until we make no progress. 1776 bool ProgressMade = true; 1777 while (ProgressMade) { 1778 ProgressMade = false; 1779 1780 for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { 1781 MachineInstr *&TheCopy = TryAgainList[i]; 1782 if (!TheCopy) 1783 continue; 1784 1785 bool Again = false; 1786 bool Success = JoinCopy(TheCopy, Again); 1787 if (Success || !Again) { 1788 TheCopy= 0; // Mark this one as done. 1789 ProgressMade = true; 1790 } 1791 } 1792 } 1793 } 1794 1795 void RegisterCoalescer::releaseMemory() { 1796 JoinedCopies.clear(); 1797 ReMatCopies.clear(); 1798 ReMatDefs.clear(); 1799 } 1800 1801 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { 1802 MF = &fn; 1803 MRI = &fn.getRegInfo(); 1804 TM = &fn.getTarget(); 1805 TRI = TM->getRegisterInfo(); 1806 TII = TM->getInstrInfo(); 1807 LIS = &getAnalysis<LiveIntervals>(); 1808 LDV = &getAnalysis<LiveDebugVariables>(); 1809 AA = &getAnalysis<AliasAnalysis>(); 1810 Loops = &getAnalysis<MachineLoopInfo>(); 1811 1812 DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" 1813 << "********** Function: " 1814 << ((Value*)MF->getFunction())->getName() << '\n'); 1815 1816 if (VerifyCoalescing) 1817 MF->verify(this, "Before register coalescing"); 1818 1819 RegClassInfo.runOnMachineFunction(fn); 1820 1821 // Join (coalesce) intervals if requested. 1822 if (EnableJoining) { 1823 joinIntervals(); 1824 DEBUG({ 1825 dbgs() << "********** INTERVALS POST JOINING **********\n"; 1826 for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); 1827 I != E; ++I){ 1828 I->second->print(dbgs(), TRI); 1829 dbgs() << "\n"; 1830 } 1831 }); 1832 } 1833 1834 // Perform a final pass over the instructions and compute spill weights 1835 // and remove identity moves. 1836 SmallVector<unsigned, 4> DeadDefs, InflateRegs; 1837 for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); 1838 mbbi != mbbe; ++mbbi) { 1839 MachineBasicBlock* mbb = mbbi; 1840 for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); 1841 mii != mie; ) { 1842 MachineInstr *MI = mii; 1843 if (JoinedCopies.count(MI)) { 1844 // Delete all coalesced copies. 1845 bool DoDelete = true; 1846 assert(MI->isCopyLike() && "Unrecognized copy instruction"); 1847 unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); 1848 unsigned DstReg = MI->getOperand(0).getReg(); 1849 1850 // Collect candidates for register class inflation. 1851 if (TargetRegisterInfo::isVirtualRegister(SrcReg) && 1852 RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg))) 1853 InflateRegs.push_back(SrcReg); 1854 if (TargetRegisterInfo::isVirtualRegister(DstReg) && 1855 RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg))) 1856 InflateRegs.push_back(DstReg); 1857 1858 if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && 1859 MI->getNumOperands() > 2) 1860 // Do not delete extract_subreg, insert_subreg of physical 1861 // registers unless the definition is dead. e.g. 1862 // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 1863 // or else the scavenger may complain. LowerSubregs will 1864 // delete them later. 1865 DoDelete = false; 1866 1867 if (MI->allDefsAreDead()) { 1868 if (TargetRegisterInfo::isVirtualRegister(SrcReg) && 1869 LIS->hasInterval(SrcReg)) 1870 LIS->shrinkToUses(&LIS->getInterval(SrcReg)); 1871 DoDelete = true; 1872 } 1873 if (!DoDelete) { 1874 // We need the instruction to adjust liveness, so make it a KILL. 1875 if (MI->isSubregToReg()) { 1876 MI->RemoveOperand(3); 1877 MI->RemoveOperand(1); 1878 } 1879 MI->setDesc(TII->get(TargetOpcode::KILL)); 1880 mii = llvm::next(mii); 1881 } else { 1882 LIS->RemoveMachineInstrFromMaps(MI); 1883 mii = mbbi->erase(mii); 1884 ++numPeep; 1885 } 1886 continue; 1887 } 1888 1889 // Now check if this is a remat'ed def instruction which is now dead. 1890 if (ReMatDefs.count(MI)) { 1891 bool isDead = true; 1892 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1893 const MachineOperand &MO = MI->getOperand(i); 1894 if (!MO.isReg()) 1895 continue; 1896 unsigned Reg = MO.getReg(); 1897 if (!Reg) 1898 continue; 1899 DeadDefs.push_back(Reg); 1900 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1901 // Remat may also enable register class inflation. 1902 if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg))) 1903 InflateRegs.push_back(Reg); 1904 } 1905 if (MO.isDead()) 1906 continue; 1907 if (TargetRegisterInfo::isPhysicalRegister(Reg) || 1908 !MRI->use_nodbg_empty(Reg)) { 1909 isDead = false; 1910 break; 1911 } 1912 } 1913 if (isDead) { 1914 while (!DeadDefs.empty()) { 1915 unsigned DeadDef = DeadDefs.back(); 1916 DeadDefs.pop_back(); 1917 RemoveDeadDef(LIS->getInterval(DeadDef), MI); 1918 } 1919 LIS->RemoveMachineInstrFromMaps(mii); 1920 mii = mbbi->erase(mii); 1921 continue; 1922 } else 1923 DeadDefs.clear(); 1924 } 1925 1926 ++mii; 1927 1928 // Check for now unnecessary kill flags. 1929 if (LIS->isNotInMIMap(MI)) continue; 1930 SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot(); 1931 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1932 MachineOperand &MO = MI->getOperand(i); 1933 if (!MO.isReg() || !MO.isKill()) continue; 1934 unsigned reg = MO.getReg(); 1935 if (!reg || !LIS->hasInterval(reg)) continue; 1936 if (!LIS->getInterval(reg).killedAt(DefIdx)) { 1937 MO.setIsKill(false); 1938 continue; 1939 } 1940 // When leaving a kill flag on a physreg, check if any subregs should 1941 // remain alive. 1942 if (!TargetRegisterInfo::isPhysicalRegister(reg)) 1943 continue; 1944 for (const unsigned *SR = TRI->getSubRegisters(reg); 1945 unsigned S = *SR; ++SR) 1946 if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx)) 1947 MI->addRegisterDefined(S, TRI); 1948 } 1949 } 1950 } 1951 1952 // After deleting a lot of copies, register classes may be less constrained. 1953 // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 -> 1954 // DPR inflation. 1955 array_pod_sort(InflateRegs.begin(), InflateRegs.end()); 1956 InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), 1957 InflateRegs.end()); 1958 DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); 1959 for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { 1960 unsigned Reg = InflateRegs[i]; 1961 if (MRI->reg_nodbg_empty(Reg)) 1962 continue; 1963 if (MRI->recomputeRegClass(Reg, *TM)) { 1964 DEBUG(dbgs() << PrintReg(Reg) << " inflated to " 1965 << MRI->getRegClass(Reg)->getName() << '\n'); 1966 ++NumInflated; 1967 } 1968 } 1969 1970 DEBUG(dump()); 1971 DEBUG(LDV->dump()); 1972 if (VerifyCoalescing) 1973 MF->verify(this, "After register coalescing"); 1974 return true; 1975 } 1976 1977 /// print - Implement the dump method. 1978 void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { 1979 LIS->print(O, m); 1980 } 1981