1 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the generic RegisterCoalescer interface which 11 // is used as the common interface used by all clients and 12 // implementations of register coalescing. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "regalloc" 17 #include "RegisterCoalescer.h" 18 #include "LiveDebugVariables.h" 19 #include "VirtRegMap.h" 20 21 #include "llvm/Pass.h" 22 #include "llvm/Value.h" 23 #include "llvm/ADT/OwningPtr.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallSet.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 29 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 30 #include "llvm/CodeGen/LiveRangeEdit.h" 31 #include "llvm/CodeGen/MachineFrameInfo.h" 32 #include "llvm/CodeGen/MachineInstr.h" 33 #include "llvm/CodeGen/MachineInstr.h" 34 #include "llvm/CodeGen/MachineLoopInfo.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/Passes.h" 38 #include "llvm/CodeGen/RegisterClassInfo.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include "llvm/Target/TargetInstrInfo.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetOptions.h" 47 #include "llvm/Target/TargetRegisterInfo.h" 48 #include <algorithm> 49 #include <cmath> 50 using namespace llvm; 51 52 STATISTIC(numJoins , "Number of interval joins performed"); 53 STATISTIC(numCrossRCs , "Number of cross class joins performed"); 54 STATISTIC(numCommutes , "Number of instruction commuting performed"); 55 STATISTIC(numExtends , "Number of copies extended"); 56 STATISTIC(NumReMats , "Number of instructions re-materialized"); 57 STATISTIC(NumInflated , "Number of register classes inflated"); 58 59 static cl::opt<bool> 60 EnableJoining("join-liveintervals", 61 cl::desc("Coalesce copies (default=true)"), 62 cl::init(true)); 63 64 static cl::opt<bool> 65 VerifyCoalescing("verify-coalescing", 66 cl::desc("Verify machine instrs before and after register coalescing"), 67 cl::Hidden); 68 69 namespace { 70 class RegisterCoalescer : public MachineFunctionPass, 71 private LiveRangeEdit::Delegate { 72 MachineFunction* MF; 73 MachineRegisterInfo* MRI; 74 const TargetMachine* TM; 75 const TargetRegisterInfo* TRI; 76 const TargetInstrInfo* TII; 77 LiveIntervals *LIS; 78 LiveDebugVariables *LDV; 79 const MachineLoopInfo* Loops; 80 AliasAnalysis *AA; 81 RegisterClassInfo RegClassInfo; 82 83 /// WorkList - Copy instructions yet to be coalesced. 84 SmallVector<MachineInstr*, 8> WorkList; 85 86 /// ErasedInstrs - Set of instruction pointers that have been erased, and 87 /// that may be present in WorkList. 88 SmallPtrSet<MachineInstr*, 8> ErasedInstrs; 89 90 /// Dead instructions that are about to be deleted. 91 SmallVector<MachineInstr*, 8> DeadDefs; 92 93 /// Virtual registers to be considered for register class inflation. 94 SmallVector<unsigned, 8> InflateRegs; 95 96 /// Recursively eliminate dead defs in DeadDefs. 97 void eliminateDeadDefs(); 98 99 /// LiveRangeEdit callback. 100 void LRE_WillEraseInstruction(MachineInstr *MI); 101 102 /// joinAllIntervals - join compatible live intervals 103 void joinAllIntervals(); 104 105 /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting 106 /// copies that cannot yet be coalesced into WorkList. 107 void copyCoalesceInMBB(MachineBasicBlock *MBB); 108 109 /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after 110 /// position From. Return true if any progress was made. 111 bool copyCoalesceWorkList(unsigned From = 0); 112 113 /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, 114 /// which are the src/dst of the copy instruction CopyMI. This returns 115 /// true if the copy was successfully coalesced away. If it is not 116 /// currently possible to coalesce this interval, but it may be possible if 117 /// other things get coalesced, then it returns true by reference in 118 /// 'Again'. 119 bool joinCopy(MachineInstr *TheCopy, bool &Again); 120 121 /// joinIntervals - Attempt to join these two intervals. On failure, this 122 /// returns false. The output "SrcInt" will not have been modified, so we 123 /// can use this information below to update aliases. 124 bool joinIntervals(CoalescerPair &CP); 125 126 /// Attempt joining with a reserved physreg. 127 bool joinReservedPhysReg(CoalescerPair &CP); 128 129 /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If 130 /// the source value number is defined by a copy from the destination reg 131 /// see if we can merge these two destination reg valno# into a single 132 /// value number, eliminating a copy. 133 bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); 134 135 /// hasOtherReachingDefs - Return true if there are definitions of IntB 136 /// other than BValNo val# that can reach uses of AValno val# of IntA. 137 bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, 138 VNInfo *AValNo, VNInfo *BValNo); 139 140 /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy. 141 /// If the source value number is defined by a commutable instruction and 142 /// its other operand is coalesced to the copy dest register, see if we 143 /// can transform the copy into a noop by commuting the definition. 144 bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); 145 146 /// reMaterializeTrivialDef - If the source of a copy is defined by a 147 /// trivial computation, replace the copy by rematerialize the definition. 148 bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, 149 MachineInstr *CopyMI); 150 151 /// canJoinPhys - Return true if a physreg copy should be joined. 152 bool canJoinPhys(CoalescerPair &CP); 153 154 /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and 155 /// update the subregister number if it is not zero. If DstReg is a 156 /// physical register and the existing subregister number of the def / use 157 /// being updated is not zero, make sure to set it to the correct physical 158 /// subregister. 159 void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); 160 161 /// eliminateUndefCopy - Handle copies of undef values. 162 bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); 163 164 public: 165 static char ID; // Class identification, replacement for typeinfo 166 RegisterCoalescer() : MachineFunctionPass(ID) { 167 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 168 } 169 170 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 171 172 virtual void releaseMemory(); 173 174 /// runOnMachineFunction - pass entry point 175 virtual bool runOnMachineFunction(MachineFunction&); 176 177 /// print - Implement the dump method. 178 virtual void print(raw_ostream &O, const Module* = 0) const; 179 }; 180 } /// end anonymous namespace 181 182 char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; 183 184 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", 185 "Simple Register Coalescing", false, false) 186 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 187 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) 188 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 189 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 190 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 191 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", 192 "Simple Register Coalescing", false, false) 193 194 char RegisterCoalescer::ID = 0; 195 196 static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) { 197 if (!a) return b; 198 if (!b) return a; 199 return tri.composeSubRegIndices(a, b); 200 } 201 202 static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, 203 unsigned &Src, unsigned &Dst, 204 unsigned &SrcSub, unsigned &DstSub) { 205 if (MI->isCopy()) { 206 Dst = MI->getOperand(0).getReg(); 207 DstSub = MI->getOperand(0).getSubReg(); 208 Src = MI->getOperand(1).getReg(); 209 SrcSub = MI->getOperand(1).getSubReg(); 210 } else if (MI->isSubregToReg()) { 211 Dst = MI->getOperand(0).getReg(); 212 DstSub = compose(tri, MI->getOperand(0).getSubReg(), 213 MI->getOperand(3).getImm()); 214 Src = MI->getOperand(2).getReg(); 215 SrcSub = MI->getOperand(2).getSubReg(); 216 } else 217 return false; 218 return true; 219 } 220 221 bool CoalescerPair::setRegisters(const MachineInstr *MI) { 222 SrcReg = DstReg = 0; 223 SrcIdx = DstIdx = 0; 224 NewRC = 0; 225 Flipped = CrossClass = false; 226 227 unsigned Src, Dst, SrcSub, DstSub; 228 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 229 return false; 230 Partial = SrcSub || DstSub; 231 232 // If one register is a physreg, it must be Dst. 233 if (TargetRegisterInfo::isPhysicalRegister(Src)) { 234 if (TargetRegisterInfo::isPhysicalRegister(Dst)) 235 return false; 236 std::swap(Src, Dst); 237 std::swap(SrcSub, DstSub); 238 Flipped = true; 239 } 240 241 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 242 243 if (TargetRegisterInfo::isPhysicalRegister(Dst)) { 244 // Eliminate DstSub on a physreg. 245 if (DstSub) { 246 Dst = TRI.getSubReg(Dst, DstSub); 247 if (!Dst) return false; 248 DstSub = 0; 249 } 250 251 // Eliminate SrcSub by picking a corresponding Dst superregister. 252 if (SrcSub) { 253 Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); 254 if (!Dst) return false; 255 SrcSub = 0; 256 } else if (!MRI.getRegClass(Src)->contains(Dst)) { 257 return false; 258 } 259 } else { 260 // Both registers are virtual. 261 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 262 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 263 264 // Both registers have subreg indices. 265 if (SrcSub && DstSub) { 266 // Copies between different sub-registers are never coalescable. 267 if (Src == Dst && SrcSub != DstSub) 268 return false; 269 270 NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, 271 SrcIdx, DstIdx); 272 if (!NewRC) 273 return false; 274 } else if (DstSub) { 275 // SrcReg will be merged with a sub-register of DstReg. 276 SrcIdx = DstSub; 277 NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); 278 } else if (SrcSub) { 279 // DstReg will be merged with a sub-register of SrcReg. 280 DstIdx = SrcSub; 281 NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub); 282 } else { 283 // This is a straight copy without sub-registers. 284 NewRC = TRI.getCommonSubClass(DstRC, SrcRC); 285 } 286 287 // The combined constraint may be impossible to satisfy. 288 if (!NewRC) 289 return false; 290 291 // Prefer SrcReg to be a sub-register of DstReg. 292 // FIXME: Coalescer should support subregs symmetrically. 293 if (DstIdx && !SrcIdx) { 294 std::swap(Src, Dst); 295 std::swap(SrcIdx, DstIdx); 296 Flipped = !Flipped; 297 } 298 299 CrossClass = NewRC != DstRC || NewRC != SrcRC; 300 } 301 // Check our invariants 302 assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); 303 assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && 304 "Cannot have a physical SubIdx"); 305 SrcReg = Src; 306 DstReg = Dst; 307 return true; 308 } 309 310 bool CoalescerPair::flip() { 311 if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 312 return false; 313 std::swap(SrcReg, DstReg); 314 std::swap(SrcIdx, DstIdx); 315 Flipped = !Flipped; 316 return true; 317 } 318 319 bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { 320 if (!MI) 321 return false; 322 unsigned Src, Dst, SrcSub, DstSub; 323 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 324 return false; 325 326 // Find the virtual register that is SrcReg. 327 if (Dst == SrcReg) { 328 std::swap(Src, Dst); 329 std::swap(SrcSub, DstSub); 330 } else if (Src != SrcReg) { 331 return false; 332 } 333 334 // Now check that Dst matches DstReg. 335 if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { 336 if (!TargetRegisterInfo::isPhysicalRegister(Dst)) 337 return false; 338 assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); 339 // DstSub could be set for a physreg from INSERT_SUBREG. 340 if (DstSub) 341 Dst = TRI.getSubReg(Dst, DstSub); 342 // Full copy of Src. 343 if (!SrcSub) 344 return DstReg == Dst; 345 // This is a partial register copy. Check that the parts match. 346 return TRI.getSubReg(DstReg, SrcSub) == Dst; 347 } else { 348 // DstReg is virtual. 349 if (DstReg != Dst) 350 return false; 351 // Registers match, do the subregisters line up? 352 return compose(TRI, SrcIdx, SrcSub) == compose(TRI, DstIdx, DstSub); 353 } 354 } 355 356 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { 357 AU.setPreservesCFG(); 358 AU.addRequired<AliasAnalysis>(); 359 AU.addRequired<LiveIntervals>(); 360 AU.addPreserved<LiveIntervals>(); 361 AU.addRequired<LiveDebugVariables>(); 362 AU.addPreserved<LiveDebugVariables>(); 363 AU.addPreserved<SlotIndexes>(); 364 AU.addRequired<MachineLoopInfo>(); 365 AU.addPreserved<MachineLoopInfo>(); 366 AU.addPreservedID(MachineDominatorsID); 367 MachineFunctionPass::getAnalysisUsage(AU); 368 } 369 370 void RegisterCoalescer::eliminateDeadDefs() { 371 SmallVector<LiveInterval*, 8> NewRegs; 372 LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); 373 } 374 375 // Callback from eliminateDeadDefs(). 376 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { 377 // MI may be in WorkList. Make sure we don't visit it. 378 ErasedInstrs.insert(MI); 379 } 380 381 /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA 382 /// being the source and IntB being the dest, thus this defines a value number 383 /// in IntB. If the source value number (in IntA) is defined by a copy from B, 384 /// see if we can merge these two pieces of B into a single value number, 385 /// eliminating a copy. For example: 386 /// 387 /// A3 = B0 388 /// ... 389 /// B1 = A3 <- this copy 390 /// 391 /// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 392 /// value number to be replaced with B0 (which simplifies the B liveinterval). 393 /// 394 /// This returns true if an interval was modified. 395 /// 396 bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, 397 MachineInstr *CopyMI) { 398 assert(!CP.isPartial() && "This doesn't work for partial copies."); 399 assert(!CP.isPhys() && "This doesn't work for physreg copies."); 400 401 LiveInterval &IntA = 402 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 403 LiveInterval &IntB = 404 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 405 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); 406 407 // BValNo is a value number in B that is defined by a copy from A. 'B3' in 408 // the example above. 409 LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); 410 if (BLR == IntB.end()) return false; 411 VNInfo *BValNo = BLR->valno; 412 413 // Get the location that B is defined at. Two options: either this value has 414 // an unknown definition point or it is defined at CopyIdx. If unknown, we 415 // can't process it. 416 if (BValNo->def != CopyIdx) return false; 417 418 // AValNo is the value number in A that defines the copy, A3 in the example. 419 SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); 420 LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); 421 // The live range might not exist after fun with physreg coalescing. 422 if (ALR == IntA.end()) return false; 423 VNInfo *AValNo = ALR->valno; 424 425 // If AValNo is defined as a copy from IntB, we can potentially process this. 426 // Get the instruction that defines this value number. 427 MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); 428 if (!CP.isCoalescable(ACopyMI)) 429 return false; 430 431 // Get the LiveRange in IntB that this value number starts with. 432 LiveInterval::iterator ValLR = 433 IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); 434 if (ValLR == IntB.end()) 435 return false; 436 437 // Make sure that the end of the live range is inside the same block as 438 // CopyMI. 439 MachineInstr *ValLREndInst = 440 LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); 441 if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) 442 return false; 443 444 // Okay, we now know that ValLR ends in the same block that the CopyMI 445 // live-range starts. If there are no intervening live ranges between them in 446 // IntB, we can merge them. 447 if (ValLR+1 != BLR) return false; 448 449 DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); 450 451 SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; 452 // We are about to delete CopyMI, so need to remove it as the 'instruction 453 // that defines this value #'. Update the valnum with the new defining 454 // instruction #. 455 BValNo->def = FillerStart; 456 457 // Okay, we can merge them. We need to insert a new liverange: 458 // [ValLR.end, BLR.begin) of either value number, then we merge the 459 // two value numbers. 460 IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); 461 462 // Okay, merge "B1" into the same value number as "B0". 463 if (BValNo != ValLR->valno) { 464 // If B1 is killed by a PHI, then the merged live range must also be killed 465 // by the same PHI, as B0 and B1 can not overlap. 466 bool HasPHIKill = BValNo->hasPHIKill(); 467 IntB.MergeValueNumberInto(BValNo, ValLR->valno); 468 if (HasPHIKill) 469 ValLR->valno->setHasPHIKill(true); 470 } 471 DEBUG(dbgs() << " result = " << IntB << '\n'); 472 473 // If the source instruction was killing the source register before the 474 // merge, unset the isKill marker given the live range has been extended. 475 int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); 476 if (UIdx != -1) { 477 ValLREndInst->getOperand(UIdx).setIsKill(false); 478 } 479 480 // Rewrite the copy. If the copy instruction was killing the destination 481 // register before the merge, find the last use and trim the live range. That 482 // will also add the isKill marker. 483 CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); 484 if (ALR->end == CopyIdx) 485 LIS->shrinkToUses(&IntA); 486 487 ++numExtends; 488 return true; 489 } 490 491 /// hasOtherReachingDefs - Return true if there are definitions of IntB 492 /// other than BValNo val# that can reach uses of AValno val# of IntA. 493 bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, 494 LiveInterval &IntB, 495 VNInfo *AValNo, 496 VNInfo *BValNo) { 497 for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); 498 AI != AE; ++AI) { 499 if (AI->valno != AValNo) continue; 500 LiveInterval::Ranges::iterator BI = 501 std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); 502 if (BI != IntB.ranges.begin()) 503 --BI; 504 for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { 505 if (BI->valno == BValNo) 506 continue; 507 if (BI->start <= AI->start && BI->end > AI->start) 508 return true; 509 if (BI->start > AI->start && BI->start < AI->end) 510 return true; 511 } 512 } 513 return false; 514 } 515 516 /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with 517 /// IntA being the source and IntB being the dest, thus this defines a value 518 /// number in IntB. If the source value number (in IntA) is defined by a 519 /// commutable instruction and its other operand is coalesced to the copy dest 520 /// register, see if we can transform the copy into a noop by commuting the 521 /// definition. For example, 522 /// 523 /// A3 = op A2 B0<kill> 524 /// ... 525 /// B1 = A3 <- this copy 526 /// ... 527 /// = op A3 <- more uses 528 /// 529 /// ==> 530 /// 531 /// B2 = op B0 A2<kill> 532 /// ... 533 /// B1 = B2 <- now an identify copy 534 /// ... 535 /// = op B2 <- more uses 536 /// 537 /// This returns true if an interval was modified. 538 /// 539 bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, 540 MachineInstr *CopyMI) { 541 assert (!CP.isPhys()); 542 543 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); 544 545 LiveInterval &IntA = 546 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 547 LiveInterval &IntB = 548 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 549 550 // BValNo is a value number in B that is defined by a copy from A. 'B3' in 551 // the example above. 552 VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); 553 if (!BValNo || BValNo->def != CopyIdx) 554 return false; 555 556 assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); 557 558 // AValNo is the value number in A that defines the copy, A3 in the example. 559 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); 560 assert(AValNo && "COPY source not live"); 561 562 // If other defs can reach uses of this def, then it's not safe to perform 563 // the optimization. 564 if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) 565 return false; 566 MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); 567 if (!DefMI) 568 return false; 569 if (!DefMI->isCommutable()) 570 return false; 571 // If DefMI is a two-address instruction then commuting it will change the 572 // destination register. 573 int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); 574 assert(DefIdx != -1); 575 unsigned UseOpIdx; 576 if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) 577 return false; 578 unsigned Op1, Op2, NewDstIdx; 579 if (!TII->findCommutedOpIndices(DefMI, Op1, Op2)) 580 return false; 581 if (Op1 == UseOpIdx) 582 NewDstIdx = Op2; 583 else if (Op2 == UseOpIdx) 584 NewDstIdx = Op1; 585 else 586 return false; 587 588 MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); 589 unsigned NewReg = NewDstMO.getReg(); 590 if (NewReg != IntB.reg || !NewDstMO.isKill()) 591 return false; 592 593 // Make sure there are no other definitions of IntB that would reach the 594 // uses which the new definition can reach. 595 if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) 596 return false; 597 598 // If some of the uses of IntA.reg is already coalesced away, return false. 599 // It's not possible to determine whether it's safe to perform the coalescing. 600 for (MachineRegisterInfo::use_nodbg_iterator UI = 601 MRI->use_nodbg_begin(IntA.reg), 602 UE = MRI->use_nodbg_end(); UI != UE; ++UI) { 603 MachineInstr *UseMI = &*UI; 604 SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); 605 LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); 606 if (ULR == IntA.end() || ULR->valno != AValNo) 607 continue; 608 // If this use is tied to a def, we can't rewrite the register. 609 if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) 610 return false; 611 } 612 613 DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' 614 << *DefMI); 615 616 // At this point we have decided that it is legal to do this 617 // transformation. Start by commuting the instruction. 618 MachineBasicBlock *MBB = DefMI->getParent(); 619 MachineInstr *NewMI = TII->commuteInstruction(DefMI); 620 if (!NewMI) 621 return false; 622 if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && 623 TargetRegisterInfo::isVirtualRegister(IntB.reg) && 624 !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) 625 return false; 626 if (NewMI != DefMI) { 627 LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); 628 MachineBasicBlock::iterator Pos = DefMI; 629 MBB->insert(Pos, NewMI); 630 MBB->erase(DefMI); 631 } 632 unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); 633 NewMI->getOperand(OpIdx).setIsKill(); 634 635 // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. 636 // A = or A, B 637 // ... 638 // B = A 639 // ... 640 // C = A<kill> 641 // ... 642 // = B 643 644 // Update uses of IntA of the specific Val# with IntB. 645 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), 646 UE = MRI->use_end(); UI != UE;) { 647 MachineOperand &UseMO = UI.getOperand(); 648 MachineInstr *UseMI = &*UI; 649 ++UI; 650 if (UseMI->isDebugValue()) { 651 // FIXME These don't have an instruction index. Not clear we have enough 652 // info to decide whether to do this replacement or not. For now do it. 653 UseMO.setReg(NewReg); 654 continue; 655 } 656 SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); 657 LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); 658 if (ULR == IntA.end() || ULR->valno != AValNo) 659 continue; 660 if (TargetRegisterInfo::isPhysicalRegister(NewReg)) 661 UseMO.substPhysReg(NewReg, *TRI); 662 else 663 UseMO.setReg(NewReg); 664 if (UseMI == CopyMI) 665 continue; 666 if (!UseMI->isCopy()) 667 continue; 668 if (UseMI->getOperand(0).getReg() != IntB.reg || 669 UseMI->getOperand(0).getSubReg()) 670 continue; 671 672 // This copy will become a noop. If it's defining a new val#, merge it into 673 // BValNo. 674 SlotIndex DefIdx = UseIdx.getRegSlot(); 675 VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); 676 if (!DVNI) 677 continue; 678 DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); 679 assert(DVNI->def == DefIdx); 680 BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); 681 ErasedInstrs.insert(UseMI); 682 LIS->RemoveMachineInstrFromMaps(UseMI); 683 UseMI->eraseFromParent(); 684 } 685 686 // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition 687 // is updated. 688 VNInfo *ValNo = BValNo; 689 ValNo->def = AValNo->def; 690 for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); 691 AI != AE; ++AI) { 692 if (AI->valno != AValNo) continue; 693 IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); 694 } 695 DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); 696 697 IntA.removeValNo(AValNo); 698 DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); 699 ++numCommutes; 700 return true; 701 } 702 703 /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial 704 /// computation, replace the copy by rematerialize the definition. 705 bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, 706 unsigned DstReg, 707 MachineInstr *CopyMI) { 708 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); 709 LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); 710 assert(SrcLR != SrcInt.end() && "Live range not found!"); 711 VNInfo *ValNo = SrcLR->valno; 712 if (ValNo->isPHIDef() || ValNo->isUnused()) 713 return false; 714 MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); 715 if (!DefMI) 716 return false; 717 assert(DefMI && "Defining instruction disappeared"); 718 if (!DefMI->isAsCheapAsAMove()) 719 return false; 720 if (!TII->isTriviallyReMaterializable(DefMI, AA)) 721 return false; 722 bool SawStore = false; 723 if (!DefMI->isSafeToMove(TII, AA, SawStore)) 724 return false; 725 const MCInstrDesc &MCID = DefMI->getDesc(); 726 if (MCID.getNumDefs() != 1) 727 return false; 728 if (!DefMI->isImplicitDef()) { 729 // Make sure the copy destination register class fits the instruction 730 // definition register class. The mismatch can happen as a result of earlier 731 // extract_subreg, insert_subreg, subreg_to_reg coalescing. 732 const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); 733 if (TargetRegisterInfo::isVirtualRegister(DstReg)) { 734 if (MRI->getRegClass(DstReg) != RC) 735 return false; 736 } else if (!RC->contains(DstReg)) 737 return false; 738 } 739 740 MachineBasicBlock *MBB = CopyMI->getParent(); 741 MachineBasicBlock::iterator MII = 742 llvm::next(MachineBasicBlock::iterator(CopyMI)); 743 TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); 744 MachineInstr *NewMI = prior(MII); 745 746 // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). 747 // We need to remember these so we can add intervals once we insert 748 // NewMI into SlotIndexes. 749 SmallVector<unsigned, 4> NewMIImplDefs; 750 for (unsigned i = NewMI->getDesc().getNumOperands(), 751 e = NewMI->getNumOperands(); i != e; ++i) { 752 MachineOperand &MO = NewMI->getOperand(i); 753 if (MO.isReg()) { 754 assert(MO.isDef() && MO.isImplicit() && MO.isDead() && 755 TargetRegisterInfo::isPhysicalRegister(MO.getReg())); 756 NewMIImplDefs.push_back(MO.getReg()); 757 } 758 } 759 760 // CopyMI may have implicit operands, transfer them over to the newly 761 // rematerialized instruction. And update implicit def interval valnos. 762 for (unsigned i = CopyMI->getDesc().getNumOperands(), 763 e = CopyMI->getNumOperands(); i != e; ++i) { 764 MachineOperand &MO = CopyMI->getOperand(i); 765 if (MO.isReg()) { 766 assert(MO.isImplicit() && "No explicit operands after implict operands."); 767 // Discard VReg implicit defs. 768 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 769 NewMI->addOperand(MO); 770 } 771 } 772 } 773 774 LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); 775 776 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); 777 for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { 778 unsigned Reg = NewMIImplDefs[i]; 779 for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) 780 if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) 781 LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); 782 } 783 784 CopyMI->eraseFromParent(); 785 ErasedInstrs.insert(CopyMI); 786 DEBUG(dbgs() << "Remat: " << *NewMI); 787 ++NumReMats; 788 789 // The source interval can become smaller because we removed a use. 790 LIS->shrinkToUses(&SrcInt, &DeadDefs); 791 if (!DeadDefs.empty()) 792 eliminateDeadDefs(); 793 794 return true; 795 } 796 797 /// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef> 798 /// values, it only removes local variables. When we have a copy like: 799 /// 800 /// %vreg1 = COPY %vreg2<undef> 801 /// 802 /// We delete the copy and remove the corresponding value number from %vreg1. 803 /// Any uses of that value number are marked as <undef>. 804 bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, 805 const CoalescerPair &CP) { 806 SlotIndex Idx = LIS->getInstructionIndex(CopyMI); 807 LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg()); 808 if (SrcInt->liveAt(Idx)) 809 return false; 810 LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg()); 811 if (DstInt->liveAt(Idx)) 812 return false; 813 814 // No intervals are live-in to CopyMI - it is undef. 815 if (CP.isFlipped()) 816 DstInt = SrcInt; 817 SrcInt = 0; 818 819 VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); 820 assert(DeadVNI && "No value defined in DstInt"); 821 DstInt->removeValNo(DeadVNI); 822 823 // Find new undef uses. 824 for (MachineRegisterInfo::reg_nodbg_iterator 825 I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end(); 826 I != E; ++I) { 827 MachineOperand &MO = I.getOperand(); 828 if (MO.isDef() || MO.isUndef()) 829 continue; 830 MachineInstr *MI = MO.getParent(); 831 SlotIndex Idx = LIS->getInstructionIndex(MI); 832 if (DstInt->liveAt(Idx)) 833 continue; 834 MO.setIsUndef(true); 835 DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI); 836 } 837 return true; 838 } 839 840 /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and 841 /// update the subregister number if it is not zero. If DstReg is a 842 /// physical register and the existing subregister number of the def / use 843 /// being updated is not zero, make sure to set it to the correct physical 844 /// subregister. 845 void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, 846 unsigned DstReg, 847 unsigned SubIdx) { 848 bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); 849 LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); 850 851 // Update LiveDebugVariables. 852 LDV->renameRegister(SrcReg, DstReg, SubIdx); 853 854 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); 855 MachineInstr *UseMI = I.skipInstruction();) { 856 SmallVector<unsigned,8> Ops; 857 bool Reads, Writes; 858 tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); 859 860 // If SrcReg wasn't read, it may still be the case that DstReg is live-in 861 // because SrcReg is a sub-register. 862 if (DstInt && !Reads && SubIdx) 863 Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI)); 864 865 // Replace SrcReg with DstReg in all UseMI operands. 866 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 867 MachineOperand &MO = UseMI->getOperand(Ops[i]); 868 869 // Adjust <undef> flags in case of sub-register joins. We don't want to 870 // turn a full def into a read-modify-write sub-register def and vice 871 // versa. 872 if (SubIdx && MO.isDef()) 873 MO.setIsUndef(!Reads); 874 875 if (DstIsPhys) 876 MO.substPhysReg(DstReg, *TRI); 877 else 878 MO.substVirtReg(DstReg, SubIdx, *TRI); 879 } 880 881 DEBUG({ 882 dbgs() << "\t\tupdated: "; 883 if (!UseMI->isDebugValue()) 884 dbgs() << LIS->getInstructionIndex(UseMI) << "\t"; 885 dbgs() << *UseMI; 886 }); 887 } 888 } 889 890 /// canJoinPhys - Return true if a copy involving a physreg should be joined. 891 bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) { 892 /// Always join simple intervals that are defined by a single copy from a 893 /// reserved register. This doesn't increase register pressure, so it is 894 /// always beneficial. 895 if (!RegClassInfo.isReserved(CP.getDstReg())) { 896 DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); 897 return false; 898 } 899 900 LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); 901 if (CP.isFlipped() && JoinVInt.containsOneValue()) 902 return true; 903 904 DEBUG(dbgs() << "\tCannot join defs into reserved register.\n"); 905 return false; 906 } 907 908 /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, 909 /// which are the src/dst of the copy instruction CopyMI. This returns true 910 /// if the copy was successfully coalesced away. If it is not currently 911 /// possible to coalesce this interval, but it may be possible if other 912 /// things get coalesced, then it returns true by reference in 'Again'. 913 bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { 914 915 Again = false; 916 DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); 917 918 CoalescerPair CP(*TRI); 919 if (!CP.setRegisters(CopyMI)) { 920 DEBUG(dbgs() << "\tNot coalescable.\n"); 921 return false; 922 } 923 924 // Dead code elimination. This really should be handled by MachineDCE, but 925 // sometimes dead copies slip through, and we can't generate invalid live 926 // ranges. 927 if (!CP.isPhys() && CopyMI->allDefsAreDead()) { 928 DEBUG(dbgs() << "\tCopy is dead.\n"); 929 DeadDefs.push_back(CopyMI); 930 eliminateDeadDefs(); 931 return true; 932 } 933 934 // Eliminate undefs. 935 if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { 936 DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); 937 LIS->RemoveMachineInstrFromMaps(CopyMI); 938 CopyMI->eraseFromParent(); 939 return false; // Not coalescable. 940 } 941 942 // Coalesced copies are normally removed immediately, but transformations 943 // like removeCopyByCommutingDef() can inadvertently create identity copies. 944 // When that happens, just join the values and remove the copy. 945 if (CP.getSrcReg() == CP.getDstReg()) { 946 LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); 947 DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); 948 LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI)); 949 if (VNInfo *DefVNI = LRQ.valueDefined()) { 950 VNInfo *ReadVNI = LRQ.valueIn(); 951 assert(ReadVNI && "No value before copy and no <undef> flag."); 952 assert(ReadVNI != DefVNI && "Cannot read and define the same value."); 953 LI.MergeValueNumberInto(DefVNI, ReadVNI); 954 DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); 955 } 956 LIS->RemoveMachineInstrFromMaps(CopyMI); 957 CopyMI->eraseFromParent(); 958 return true; 959 } 960 961 // Enforce policies. 962 if (CP.isPhys()) { 963 DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) 964 << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) 965 << '\n'); 966 if (!canJoinPhys(CP)) { 967 // Before giving up coalescing, if definition of source is defined by 968 // trivial computation, try rematerializing it. 969 if (!CP.isFlipped() && 970 reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), 971 CP.getDstReg(), CopyMI)) 972 return true; 973 return false; 974 } 975 } else { 976 DEBUG({ 977 dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName() 978 << " with "; 979 if (CP.getDstIdx() && CP.getSrcIdx()) 980 dbgs() << PrintReg(CP.getDstReg()) << " in " 981 << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " 982 << PrintReg(CP.getSrcReg()) << " in " 983 << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; 984 else 985 dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " 986 << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; 987 }); 988 989 // When possible, let DstReg be the larger interval. 990 if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() > 991 LIS->getInterval(CP.getDstReg()).ranges.size()) 992 CP.flip(); 993 } 994 995 // Okay, attempt to join these two intervals. On failure, this returns false. 996 // Otherwise, if one of the intervals being joined is a physreg, this method 997 // always canonicalizes DstInt to be it. The output "SrcInt" will not have 998 // been modified, so we can use this information below to update aliases. 999 if (!joinIntervals(CP)) { 1000 // Coalescing failed. 1001 1002 // If definition of source is defined by trivial computation, try 1003 // rematerializing it. 1004 if (!CP.isFlipped() && 1005 reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), 1006 CP.getDstReg(), CopyMI)) 1007 return true; 1008 1009 // If we can eliminate the copy without merging the live ranges, do so now. 1010 if (!CP.isPartial() && !CP.isPhys()) { 1011 if (adjustCopiesBackFrom(CP, CopyMI) || 1012 removeCopyByCommutingDef(CP, CopyMI)) { 1013 LIS->RemoveMachineInstrFromMaps(CopyMI); 1014 CopyMI->eraseFromParent(); 1015 DEBUG(dbgs() << "\tTrivial!\n"); 1016 return true; 1017 } 1018 } 1019 1020 // Otherwise, we are unable to join the intervals. 1021 DEBUG(dbgs() << "\tInterference!\n"); 1022 Again = true; // May be possible to coalesce later. 1023 return false; 1024 } 1025 1026 // Coalescing to a virtual register that is of a sub-register class of the 1027 // other. Make sure the resulting register is set to the right register class. 1028 if (CP.isCrossClass()) { 1029 ++numCrossRCs; 1030 MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); 1031 } 1032 1033 // Removing sub-register copies can ease the register class constraints. 1034 // Make sure we attempt to inflate the register class of DstReg. 1035 if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC())) 1036 InflateRegs.push_back(CP.getDstReg()); 1037 1038 // CopyMI has been erased by joinIntervals at this point. Remove it from 1039 // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back 1040 // to the work list. This keeps ErasedInstrs from growing needlessly. 1041 ErasedInstrs.erase(CopyMI); 1042 1043 // Rewrite all SrcReg operands to DstReg. 1044 // Also update DstReg operands to include DstIdx if it is set. 1045 if (CP.getDstIdx()) 1046 updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); 1047 updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); 1048 1049 // SrcReg is guaranteed to be the register whose live interval that is 1050 // being merged. 1051 LIS->removeInterval(CP.getSrcReg()); 1052 1053 // Update regalloc hint. 1054 TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); 1055 1056 DEBUG({ 1057 dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); 1058 if (!CP.isPhys()) 1059 dbgs() << LIS->getInterval(CP.getDstReg()); 1060 dbgs() << '\n'; 1061 }); 1062 1063 ++numJoins; 1064 return true; 1065 } 1066 1067 /// Attempt joining with a reserved physreg. 1068 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { 1069 assert(CP.isPhys() && "Must be a physreg copy"); 1070 assert(RegClassInfo.isReserved(CP.getDstReg()) && "Not a reserved register"); 1071 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 1072 DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS 1073 << '\n'); 1074 1075 assert(CP.isFlipped() && RHS.containsOneValue() && 1076 "Invalid join with reserved register"); 1077 1078 // Optimization for reserved registers like ESP. We can only merge with a 1079 // reserved physreg if RHS has a single value that is a copy of CP.DstReg(). 1080 // The live range of the reserved register will look like a set of dead defs 1081 // - we don't properly track the live range of reserved registers. 1082 1083 // Deny any overlapping intervals. This depends on all the reserved 1084 // register live ranges to look like dead defs. 1085 for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI) 1086 if (RHS.overlaps(LIS->getRegUnit(*UI))) { 1087 DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); 1088 return false; 1089 } 1090 1091 // Skip any value computations, we are not adding new values to the 1092 // reserved register. Also skip merging the live ranges, the reserved 1093 // register live range doesn't need to be accurate as long as all the 1094 // defs are there. 1095 1096 // We don't track kills for reserved registers. 1097 MRI->clearKillFlags(CP.getSrcReg()); 1098 1099 return true; 1100 } 1101 1102 /// ComputeUltimateVN - Assuming we are going to join two live intervals, 1103 /// compute what the resultant value numbers for each value in the input two 1104 /// ranges will be. This is complicated by copies between the two which can 1105 /// and will commonly cause multiple value numbers to be merged into one. 1106 /// 1107 /// VN is the value number that we're trying to resolve. InstDefiningValue 1108 /// keeps track of the new InstDefiningValue assignment for the result 1109 /// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of 1110 /// whether a value in this or other is a copy from the opposite set. 1111 /// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have 1112 /// already been assigned. 1113 /// 1114 /// ThisFromOther[x] - If x is defined as a copy from the other interval, this 1115 /// contains the value number the copy is from. 1116 /// 1117 static unsigned ComputeUltimateVN(VNInfo *VNI, 1118 SmallVector<VNInfo*, 16> &NewVNInfo, 1119 DenseMap<VNInfo*, VNInfo*> &ThisFromOther, 1120 DenseMap<VNInfo*, VNInfo*> &OtherFromThis, 1121 SmallVector<int, 16> &ThisValNoAssignments, 1122 SmallVector<int, 16> &OtherValNoAssignments) { 1123 unsigned VN = VNI->id; 1124 1125 // If the VN has already been computed, just return it. 1126 if (ThisValNoAssignments[VN] >= 0) 1127 return ThisValNoAssignments[VN]; 1128 assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); 1129 1130 // If this val is not a copy from the other val, then it must be a new value 1131 // number in the destination. 1132 DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); 1133 if (I == ThisFromOther.end()) { 1134 NewVNInfo.push_back(VNI); 1135 return ThisValNoAssignments[VN] = NewVNInfo.size()-1; 1136 } 1137 VNInfo *OtherValNo = I->second; 1138 1139 // Otherwise, this *is* a copy from the RHS. If the other side has already 1140 // been computed, return it. 1141 if (OtherValNoAssignments[OtherValNo->id] >= 0) 1142 return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; 1143 1144 // Mark this value number as currently being computed, then ask what the 1145 // ultimate value # of the other value is. 1146 ThisValNoAssignments[VN] = -2; 1147 unsigned UltimateVN = 1148 ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, 1149 OtherValNoAssignments, ThisValNoAssignments); 1150 return ThisValNoAssignments[VN] = UltimateVN; 1151 } 1152 1153 1154 // Find out if we have something like 1155 // A = X 1156 // B = X 1157 // if so, we can pretend this is actually 1158 // A = X 1159 // B = A 1160 // which allows us to coalesce A and B. 1161 // VNI is the definition of B. LR is the life range of A that includes 1162 // the slot just before B. If we return true, we add "B = X" to DupCopies. 1163 // This implies that A dominates B. 1164 static bool RegistersDefinedFromSameValue(LiveIntervals &li, 1165 const TargetRegisterInfo &tri, 1166 CoalescerPair &CP, 1167 VNInfo *VNI, 1168 VNInfo *OtherVNI, 1169 SmallVector<MachineInstr*, 8> &DupCopies) { 1170 // FIXME: This is very conservative. For example, we don't handle 1171 // physical registers. 1172 1173 MachineInstr *MI = li.getInstructionFromIndex(VNI->def); 1174 1175 if (!MI || CP.isPartial() || CP.isPhys()) 1176 return false; 1177 1178 unsigned A = CP.getDstReg(); 1179 if (!TargetRegisterInfo::isVirtualRegister(A)) 1180 return false; 1181 1182 unsigned B = CP.getSrcReg(); 1183 if (!TargetRegisterInfo::isVirtualRegister(B)) 1184 return false; 1185 1186 MachineInstr *OtherMI = li.getInstructionFromIndex(OtherVNI->def); 1187 if (!OtherMI) 1188 return false; 1189 1190 if (MI->isImplicitDef()) { 1191 DupCopies.push_back(MI); 1192 return true; 1193 } else { 1194 if (!MI->isFullCopy()) 1195 return false; 1196 unsigned Src = MI->getOperand(1).getReg(); 1197 if (!TargetRegisterInfo::isVirtualRegister(Src)) 1198 return false; 1199 if (!OtherMI->isFullCopy()) 1200 return false; 1201 unsigned OtherSrc = OtherMI->getOperand(1).getReg(); 1202 if (!TargetRegisterInfo::isVirtualRegister(OtherSrc)) 1203 return false; 1204 1205 if (Src != OtherSrc) 1206 return false; 1207 1208 // If the copies use two different value numbers of X, we cannot merge 1209 // A and B. 1210 LiveInterval &SrcInt = li.getInterval(Src); 1211 // getVNInfoBefore returns NULL for undef copies. In this case, the 1212 // optimization is still safe. 1213 if (SrcInt.getVNInfoBefore(OtherVNI->def) != 1214 SrcInt.getVNInfoBefore(VNI->def)) 1215 return false; 1216 1217 DupCopies.push_back(MI); 1218 return true; 1219 } 1220 } 1221 1222 /// joinIntervals - Attempt to join these two intervals. On failure, this 1223 /// returns false. 1224 bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { 1225 // Handle physreg joins separately. 1226 if (CP.isPhys()) 1227 return joinReservedPhysReg(CP); 1228 1229 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 1230 DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS 1231 << '\n'); 1232 1233 // Compute the final value assignment, assuming that the live ranges can be 1234 // coalesced. 1235 SmallVector<int, 16> LHSValNoAssignments; 1236 SmallVector<int, 16> RHSValNoAssignments; 1237 DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; 1238 DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; 1239 SmallVector<VNInfo*, 16> NewVNInfo; 1240 1241 SmallVector<MachineInstr*, 8> DupCopies; 1242 SmallVector<MachineInstr*, 8> DeadCopies; 1243 1244 LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg()); 1245 DEBUG(dbgs() << "\t\tLHS = " << PrintReg(CP.getDstReg(), TRI) << ' ' << LHS 1246 << '\n'); 1247 1248 // Loop over the value numbers of the LHS, seeing if any are defined from 1249 // the RHS. 1250 for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); 1251 i != e; ++i) { 1252 VNInfo *VNI = *i; 1253 if (VNI->isUnused() || VNI->isPHIDef()) 1254 continue; 1255 MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); 1256 assert(MI && "Missing def"); 1257 if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? 1258 continue; 1259 1260 // Figure out the value # from the RHS. 1261 VNInfo *OtherVNI = RHS.getVNInfoBefore(VNI->def); 1262 // The copy could be to an aliased physreg. 1263 if (!OtherVNI) 1264 continue; 1265 1266 // DstReg is known to be a register in the LHS interval. If the src is 1267 // from the RHS interval, we can use its value #. 1268 if (CP.isCoalescable(MI)) 1269 DeadCopies.push_back(MI); 1270 else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, 1271 DupCopies)) 1272 continue; 1273 1274 LHSValsDefinedFromRHS[VNI] = OtherVNI; 1275 } 1276 1277 // Loop over the value numbers of the RHS, seeing if any are defined from 1278 // the LHS. 1279 for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); 1280 i != e; ++i) { 1281 VNInfo *VNI = *i; 1282 if (VNI->isUnused() || VNI->isPHIDef()) 1283 continue; 1284 MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); 1285 assert(MI && "Missing def"); 1286 if (!MI->isCopyLike() && !MI->isImplicitDef()) // Src not defined by a copy? 1287 continue; 1288 1289 // Figure out the value # from the LHS. 1290 VNInfo *OtherVNI = LHS.getVNInfoBefore(VNI->def); 1291 // The copy could be to an aliased physreg. 1292 if (!OtherVNI) 1293 continue; 1294 1295 // DstReg is known to be a register in the RHS interval. If the src is 1296 // from the LHS interval, we can use its value #. 1297 if (CP.isCoalescable(MI)) 1298 DeadCopies.push_back(MI); 1299 else if (!RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, OtherVNI, 1300 DupCopies)) 1301 continue; 1302 1303 RHSValsDefinedFromLHS[VNI] = OtherVNI; 1304 } 1305 1306 LHSValNoAssignments.resize(LHS.getNumValNums(), -1); 1307 RHSValNoAssignments.resize(RHS.getNumValNums(), -1); 1308 NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); 1309 1310 for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); 1311 i != e; ++i) { 1312 VNInfo *VNI = *i; 1313 unsigned VN = VNI->id; 1314 if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 1315 continue; 1316 ComputeUltimateVN(VNI, NewVNInfo, 1317 LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, 1318 LHSValNoAssignments, RHSValNoAssignments); 1319 } 1320 for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); 1321 i != e; ++i) { 1322 VNInfo *VNI = *i; 1323 unsigned VN = VNI->id; 1324 if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 1325 continue; 1326 // If this value number isn't a copy from the LHS, it's a new number. 1327 if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { 1328 NewVNInfo.push_back(VNI); 1329 RHSValNoAssignments[VN] = NewVNInfo.size()-1; 1330 continue; 1331 } 1332 1333 ComputeUltimateVN(VNI, NewVNInfo, 1334 RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, 1335 RHSValNoAssignments, LHSValNoAssignments); 1336 } 1337 1338 // Armed with the mappings of LHS/RHS values to ultimate values, walk the 1339 // interval lists to see if these intervals are coalescable. 1340 LiveInterval::const_iterator I = LHS.begin(); 1341 LiveInterval::const_iterator IE = LHS.end(); 1342 LiveInterval::const_iterator J = RHS.begin(); 1343 LiveInterval::const_iterator JE = RHS.end(); 1344 1345 // Collect interval end points that will no longer be kills. 1346 SmallVector<MachineInstr*, 8> LHSOldKills; 1347 SmallVector<MachineInstr*, 8> RHSOldKills; 1348 1349 // Skip ahead until the first place of potential sharing. 1350 if (I != IE && J != JE) { 1351 if (I->start < J->start) { 1352 I = std::upper_bound(I, IE, J->start); 1353 if (I != LHS.begin()) --I; 1354 } else if (J->start < I->start) { 1355 J = std::upper_bound(J, JE, I->start); 1356 if (J != RHS.begin()) --J; 1357 } 1358 } 1359 1360 while (I != IE && J != JE) { 1361 // Determine if these two live ranges overlap. 1362 // If so, check value # info to determine if they are really different. 1363 if (I->end > J->start && J->end > I->start) { 1364 // If the live range overlap will map to the same value number in the 1365 // result liverange, we can still coalesce them. If not, we can't. 1366 if (LHSValNoAssignments[I->valno->id] != 1367 RHSValNoAssignments[J->valno->id]) 1368 return false; 1369 1370 // Extended live ranges should no longer be killed. 1371 if (!I->end.isBlock() && I->end < J->end) 1372 if (MachineInstr *MI = LIS->getInstructionFromIndex(I->end)) 1373 LHSOldKills.push_back(MI); 1374 if (!J->end.isBlock() && J->end < I->end) 1375 if (MachineInstr *MI = LIS->getInstructionFromIndex(J->end)) 1376 RHSOldKills.push_back(MI); 1377 } 1378 1379 if (I->end < J->end) 1380 ++I; 1381 else 1382 ++J; 1383 } 1384 1385 // Update kill info. Some live ranges are extended due to copy coalescing. 1386 for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), 1387 E = LHSValsDefinedFromRHS.end(); I != E; ++I) { 1388 VNInfo *VNI = I->first; 1389 unsigned LHSValID = LHSValNoAssignments[VNI->id]; 1390 if (VNI->hasPHIKill()) 1391 NewVNInfo[LHSValID]->setHasPHIKill(true); 1392 } 1393 1394 // Update kill info. Some live ranges are extended due to copy coalescing. 1395 for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), 1396 E = RHSValsDefinedFromLHS.end(); I != E; ++I) { 1397 VNInfo *VNI = I->first; 1398 unsigned RHSValID = RHSValNoAssignments[VNI->id]; 1399 if (VNI->hasPHIKill()) 1400 NewVNInfo[RHSValID]->setHasPHIKill(true); 1401 } 1402 1403 // Clear kill flags where live ranges are extended. 1404 while (!LHSOldKills.empty()) 1405 LHSOldKills.pop_back_val()->clearRegisterKills(LHS.reg, TRI); 1406 while (!RHSOldKills.empty()) 1407 RHSOldKills.pop_back_val()->clearRegisterKills(RHS.reg, TRI); 1408 1409 if (LHSValNoAssignments.empty()) 1410 LHSValNoAssignments.push_back(-1); 1411 if (RHSValNoAssignments.empty()) 1412 RHSValNoAssignments.push_back(-1); 1413 1414 // Now erase all the redundant copies. 1415 for (unsigned i = 0, e = DeadCopies.size(); i != e; ++i) { 1416 MachineInstr *MI = DeadCopies[i]; 1417 if (!ErasedInstrs.insert(MI)) 1418 continue; 1419 DEBUG(dbgs() << "\t\terased:\t" << LIS->getInstructionIndex(MI) 1420 << '\t' << *MI); 1421 LIS->RemoveMachineInstrFromMaps(MI); 1422 MI->eraseFromParent(); 1423 } 1424 1425 SmallVector<unsigned, 8> SourceRegisters; 1426 for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(), 1427 E = DupCopies.end(); I != E; ++I) { 1428 MachineInstr *MI = *I; 1429 if (!ErasedInstrs.insert(MI)) 1430 continue; 1431 1432 // If MI is a copy, then we have pretended that the assignment to B in 1433 // A = X 1434 // B = X 1435 // was actually a copy from A. Now that we decided to coalesce A and B, 1436 // transform the code into 1437 // A = X 1438 // In the case of the implicit_def, we just have to remove it. 1439 if (!MI->isImplicitDef()) { 1440 unsigned Src = MI->getOperand(1).getReg(); 1441 SourceRegisters.push_back(Src); 1442 } 1443 LIS->RemoveMachineInstrFromMaps(MI); 1444 MI->eraseFromParent(); 1445 } 1446 1447 // If B = X was the last use of X in a liverange, we have to shrink it now 1448 // that B = X is gone. 1449 for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(), 1450 E = SourceRegisters.end(); I != E; ++I) { 1451 LIS->shrinkToUses(&LIS->getInterval(*I)); 1452 } 1453 1454 // If we get here, we know that we can coalesce the live ranges. Ask the 1455 // intervals to coalesce themselves now. 1456 LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, 1457 MRI); 1458 return true; 1459 } 1460 1461 namespace { 1462 // DepthMBBCompare - Comparison predicate that sort first based on the loop 1463 // depth of the basic block (the unsigned), and then on the MBB number. 1464 struct DepthMBBCompare { 1465 typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; 1466 bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { 1467 // Deeper loops first 1468 if (LHS.first != RHS.first) 1469 return LHS.first > RHS.first; 1470 1471 // Prefer blocks that are more connected in the CFG. This takes care of 1472 // the most difficult copies first while intervals are short. 1473 unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); 1474 unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); 1475 if (cl != cr) 1476 return cl > cr; 1477 1478 // As a last resort, sort by block number. 1479 return LHS.second->getNumber() < RHS.second->getNumber(); 1480 } 1481 }; 1482 } 1483 1484 // Try joining WorkList copies starting from index From. 1485 // Null out any successful joins. 1486 bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) { 1487 assert(From <= WorkList.size() && "Out of range"); 1488 bool Progress = false; 1489 for (unsigned i = From, e = WorkList.size(); i != e; ++i) { 1490 if (!WorkList[i]) 1491 continue; 1492 // Skip instruction pointers that have already been erased, for example by 1493 // dead code elimination. 1494 if (ErasedInstrs.erase(WorkList[i])) { 1495 WorkList[i] = 0; 1496 continue; 1497 } 1498 bool Again = false; 1499 bool Success = joinCopy(WorkList[i], Again); 1500 Progress |= Success; 1501 if (Success || !Again) 1502 WorkList[i] = 0; 1503 } 1504 return Progress; 1505 } 1506 1507 void 1508 RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { 1509 DEBUG(dbgs() << MBB->getName() << ":\n"); 1510 1511 // Collect all copy-like instructions in MBB. Don't start coalescing anything 1512 // yet, it might invalidate the iterator. 1513 const unsigned PrevSize = WorkList.size(); 1514 for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); 1515 MII != E; ++MII) 1516 if (MII->isCopyLike()) 1517 WorkList.push_back(MII); 1518 1519 // Try coalescing the collected copies immediately, and remove the nulls. 1520 // This prevents the WorkList from getting too large since most copies are 1521 // joinable on the first attempt. 1522 if (copyCoalesceWorkList(PrevSize)) 1523 WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), 1524 (MachineInstr*)0), WorkList.end()); 1525 } 1526 1527 void RegisterCoalescer::joinAllIntervals() { 1528 DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); 1529 assert(WorkList.empty() && "Old data still around."); 1530 1531 if (Loops->empty()) { 1532 // If there are no loops in the function, join intervals in function order. 1533 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); 1534 I != E; ++I) 1535 copyCoalesceInMBB(I); 1536 } else { 1537 // Otherwise, join intervals in inner loops before other intervals. 1538 // Unfortunately we can't just iterate over loop hierarchy here because 1539 // there may be more MBB's than BB's. Collect MBB's for sorting. 1540 1541 // Join intervals in the function prolog first. We want to join physical 1542 // registers with virtual registers before the intervals got too long. 1543 std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; 1544 for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ 1545 MachineBasicBlock *MBB = I; 1546 MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I)); 1547 } 1548 1549 // Sort by loop depth. 1550 std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); 1551 1552 // Finally, join intervals in loop nest order. 1553 for (unsigned i = 0, e = MBBs.size(); i != e; ++i) 1554 copyCoalesceInMBB(MBBs[i].second); 1555 } 1556 1557 // Joining intervals can allow other intervals to be joined. Iteratively join 1558 // until we make no progress. 1559 while (copyCoalesceWorkList()) 1560 /* empty */ ; 1561 } 1562 1563 void RegisterCoalescer::releaseMemory() { 1564 ErasedInstrs.clear(); 1565 WorkList.clear(); 1566 DeadDefs.clear(); 1567 InflateRegs.clear(); 1568 } 1569 1570 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { 1571 MF = &fn; 1572 MRI = &fn.getRegInfo(); 1573 TM = &fn.getTarget(); 1574 TRI = TM->getRegisterInfo(); 1575 TII = TM->getInstrInfo(); 1576 LIS = &getAnalysis<LiveIntervals>(); 1577 LDV = &getAnalysis<LiveDebugVariables>(); 1578 AA = &getAnalysis<AliasAnalysis>(); 1579 Loops = &getAnalysis<MachineLoopInfo>(); 1580 1581 DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" 1582 << "********** Function: " 1583 << ((Value*)MF->getFunction())->getName() << '\n'); 1584 1585 if (VerifyCoalescing) 1586 MF->verify(this, "Before register coalescing"); 1587 1588 RegClassInfo.runOnMachineFunction(fn); 1589 1590 // Join (coalesce) intervals if requested. 1591 if (EnableJoining) 1592 joinAllIntervals(); 1593 1594 // After deleting a lot of copies, register classes may be less constrained. 1595 // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> 1596 // DPR inflation. 1597 array_pod_sort(InflateRegs.begin(), InflateRegs.end()); 1598 InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), 1599 InflateRegs.end()); 1600 DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); 1601 for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { 1602 unsigned Reg = InflateRegs[i]; 1603 if (MRI->reg_nodbg_empty(Reg)) 1604 continue; 1605 if (MRI->recomputeRegClass(Reg, *TM)) { 1606 DEBUG(dbgs() << PrintReg(Reg) << " inflated to " 1607 << MRI->getRegClass(Reg)->getName() << '\n'); 1608 ++NumInflated; 1609 } 1610 } 1611 1612 DEBUG(dump()); 1613 DEBUG(LDV->dump()); 1614 if (VerifyCoalescing) 1615 MF->verify(this, "After register coalescing"); 1616 return true; 1617 } 1618 1619 /// print - Implement the dump method. 1620 void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { 1621 LIS->print(O, m); 1622 } 1623