1 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the generic RegisterCoalescer interface which 11 // is used as the common interface used by all clients and 12 // implementations of register coalescing. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "regalloc" 17 #include "RegisterCoalescer.h" 18 #include "LiveDebugVariables.h" 19 #include "VirtRegMap.h" 20 21 #include "llvm/Pass.h" 22 #include "llvm/Value.h" 23 #include "llvm/ADT/OwningPtr.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/ADT/SmallSet.h" 26 #include "llvm/ADT/Statistic.h" 27 #include "llvm/Analysis/AliasAnalysis.h" 28 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 29 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 30 #include "llvm/CodeGen/LiveRangeEdit.h" 31 #include "llvm/CodeGen/MachineFrameInfo.h" 32 #include "llvm/CodeGen/MachineInstr.h" 33 #include "llvm/CodeGen/MachineInstr.h" 34 #include "llvm/CodeGen/MachineLoopInfo.h" 35 #include "llvm/CodeGen/MachineRegisterInfo.h" 36 #include "llvm/CodeGen/MachineRegisterInfo.h" 37 #include "llvm/CodeGen/Passes.h" 38 #include "llvm/CodeGen/RegisterClassInfo.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include "llvm/Target/TargetInstrInfo.h" 44 #include "llvm/Target/TargetInstrInfo.h" 45 #include "llvm/Target/TargetMachine.h" 46 #include "llvm/Target/TargetOptions.h" 47 #include "llvm/Target/TargetRegisterInfo.h" 48 #include <algorithm> 49 #include <cmath> 50 using namespace llvm; 51 52 STATISTIC(numJoins , "Number of interval joins performed"); 53 STATISTIC(numCrossRCs , "Number of cross class joins performed"); 54 STATISTIC(numCommutes , "Number of instruction commuting performed"); 55 STATISTIC(numExtends , "Number of copies extended"); 56 STATISTIC(NumReMats , "Number of instructions re-materialized"); 57 STATISTIC(NumInflated , "Number of register classes inflated"); 58 STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested"); 59 STATISTIC(NumLaneResolves, "Number of dead lane conflicts resolved"); 60 61 static cl::opt<bool> 62 EnableJoining("join-liveintervals", 63 cl::desc("Coalesce copies (default=true)"), 64 cl::init(true)); 65 66 static cl::opt<bool> 67 VerifyCoalescing("verify-coalescing", 68 cl::desc("Verify machine instrs before and after register coalescing"), 69 cl::Hidden); 70 71 namespace { 72 class RegisterCoalescer : public MachineFunctionPass, 73 private LiveRangeEdit::Delegate { 74 MachineFunction* MF; 75 MachineRegisterInfo* MRI; 76 const TargetMachine* TM; 77 const TargetRegisterInfo* TRI; 78 const TargetInstrInfo* TII; 79 LiveIntervals *LIS; 80 LiveDebugVariables *LDV; 81 const MachineLoopInfo* Loops; 82 AliasAnalysis *AA; 83 RegisterClassInfo RegClassInfo; 84 85 /// WorkList - Copy instructions yet to be coalesced. 86 SmallVector<MachineInstr*, 8> WorkList; 87 88 /// ErasedInstrs - Set of instruction pointers that have been erased, and 89 /// that may be present in WorkList. 90 SmallPtrSet<MachineInstr*, 8> ErasedInstrs; 91 92 /// Dead instructions that are about to be deleted. 93 SmallVector<MachineInstr*, 8> DeadDefs; 94 95 /// Virtual registers to be considered for register class inflation. 96 SmallVector<unsigned, 8> InflateRegs; 97 98 /// Recursively eliminate dead defs in DeadDefs. 99 void eliminateDeadDefs(); 100 101 /// LiveRangeEdit callback. 102 void LRE_WillEraseInstruction(MachineInstr *MI); 103 104 /// joinAllIntervals - join compatible live intervals 105 void joinAllIntervals(); 106 107 /// copyCoalesceInMBB - Coalesce copies in the specified MBB, putting 108 /// copies that cannot yet be coalesced into WorkList. 109 void copyCoalesceInMBB(MachineBasicBlock *MBB); 110 111 /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after 112 /// position From. Return true if any progress was made. 113 bool copyCoalesceWorkList(unsigned From = 0); 114 115 /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, 116 /// which are the src/dst of the copy instruction CopyMI. This returns 117 /// true if the copy was successfully coalesced away. If it is not 118 /// currently possible to coalesce this interval, but it may be possible if 119 /// other things get coalesced, then it returns true by reference in 120 /// 'Again'. 121 bool joinCopy(MachineInstr *TheCopy, bool &Again); 122 123 /// joinIntervals - Attempt to join these two intervals. On failure, this 124 /// returns false. The output "SrcInt" will not have been modified, so we 125 /// can use this information below to update aliases. 126 bool joinIntervals(CoalescerPair &CP); 127 128 /// Attempt joining two virtual registers. Return true on success. 129 bool joinVirtRegs(CoalescerPair &CP); 130 131 /// Attempt joining with a reserved physreg. 132 bool joinReservedPhysReg(CoalescerPair &CP); 133 134 /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy. If 135 /// the source value number is defined by a copy from the destination reg 136 /// see if we can merge these two destination reg valno# into a single 137 /// value number, eliminating a copy. 138 bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); 139 140 /// hasOtherReachingDefs - Return true if there are definitions of IntB 141 /// other than BValNo val# that can reach uses of AValno val# of IntA. 142 bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, 143 VNInfo *AValNo, VNInfo *BValNo); 144 145 /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy. 146 /// If the source value number is defined by a commutable instruction and 147 /// its other operand is coalesced to the copy dest register, see if we 148 /// can transform the copy into a noop by commuting the definition. 149 bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); 150 151 /// reMaterializeTrivialDef - If the source of a copy is defined by a 152 /// trivial computation, replace the copy by rematerialize the definition. 153 bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, 154 MachineInstr *CopyMI); 155 156 /// canJoinPhys - Return true if a physreg copy should be joined. 157 bool canJoinPhys(CoalescerPair &CP); 158 159 /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and 160 /// update the subregister number if it is not zero. If DstReg is a 161 /// physical register and the existing subregister number of the def / use 162 /// being updated is not zero, make sure to set it to the correct physical 163 /// subregister. 164 void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); 165 166 /// eliminateUndefCopy - Handle copies of undef values. 167 bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); 168 169 public: 170 static char ID; // Class identification, replacement for typeinfo 171 RegisterCoalescer() : MachineFunctionPass(ID) { 172 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 173 } 174 175 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 176 177 virtual void releaseMemory(); 178 179 /// runOnMachineFunction - pass entry point 180 virtual bool runOnMachineFunction(MachineFunction&); 181 182 /// print - Implement the dump method. 183 virtual void print(raw_ostream &O, const Module* = 0) const; 184 }; 185 } /// end anonymous namespace 186 187 char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; 188 189 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", 190 "Simple Register Coalescing", false, false) 191 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 192 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) 193 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 194 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 195 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 196 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", 197 "Simple Register Coalescing", false, false) 198 199 char RegisterCoalescer::ID = 0; 200 201 static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, 202 unsigned &Src, unsigned &Dst, 203 unsigned &SrcSub, unsigned &DstSub) { 204 if (MI->isCopy()) { 205 Dst = MI->getOperand(0).getReg(); 206 DstSub = MI->getOperand(0).getSubReg(); 207 Src = MI->getOperand(1).getReg(); 208 SrcSub = MI->getOperand(1).getSubReg(); 209 } else if (MI->isSubregToReg()) { 210 Dst = MI->getOperand(0).getReg(); 211 DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(), 212 MI->getOperand(3).getImm()); 213 Src = MI->getOperand(2).getReg(); 214 SrcSub = MI->getOperand(2).getSubReg(); 215 } else 216 return false; 217 return true; 218 } 219 220 bool CoalescerPair::setRegisters(const MachineInstr *MI) { 221 SrcReg = DstReg = 0; 222 SrcIdx = DstIdx = 0; 223 NewRC = 0; 224 Flipped = CrossClass = false; 225 226 unsigned Src, Dst, SrcSub, DstSub; 227 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 228 return false; 229 Partial = SrcSub || DstSub; 230 231 // If one register is a physreg, it must be Dst. 232 if (TargetRegisterInfo::isPhysicalRegister(Src)) { 233 if (TargetRegisterInfo::isPhysicalRegister(Dst)) 234 return false; 235 std::swap(Src, Dst); 236 std::swap(SrcSub, DstSub); 237 Flipped = true; 238 } 239 240 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 241 242 if (TargetRegisterInfo::isPhysicalRegister(Dst)) { 243 // Eliminate DstSub on a physreg. 244 if (DstSub) { 245 Dst = TRI.getSubReg(Dst, DstSub); 246 if (!Dst) return false; 247 DstSub = 0; 248 } 249 250 // Eliminate SrcSub by picking a corresponding Dst superregister. 251 if (SrcSub) { 252 Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); 253 if (!Dst) return false; 254 SrcSub = 0; 255 } else if (!MRI.getRegClass(Src)->contains(Dst)) { 256 return false; 257 } 258 } else { 259 // Both registers are virtual. 260 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 261 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 262 263 // Both registers have subreg indices. 264 if (SrcSub && DstSub) { 265 // Copies between different sub-registers are never coalescable. 266 if (Src == Dst && SrcSub != DstSub) 267 return false; 268 269 NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub, 270 SrcIdx, DstIdx); 271 if (!NewRC) 272 return false; 273 } else if (DstSub) { 274 // SrcReg will be merged with a sub-register of DstReg. 275 SrcIdx = DstSub; 276 NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); 277 } else if (SrcSub) { 278 // DstReg will be merged with a sub-register of SrcReg. 279 DstIdx = SrcSub; 280 NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub); 281 } else { 282 // This is a straight copy without sub-registers. 283 NewRC = TRI.getCommonSubClass(DstRC, SrcRC); 284 } 285 286 // The combined constraint may be impossible to satisfy. 287 if (!NewRC) 288 return false; 289 290 // Prefer SrcReg to be a sub-register of DstReg. 291 // FIXME: Coalescer should support subregs symmetrically. 292 if (DstIdx && !SrcIdx) { 293 std::swap(Src, Dst); 294 std::swap(SrcIdx, DstIdx); 295 Flipped = !Flipped; 296 } 297 298 CrossClass = NewRC != DstRC || NewRC != SrcRC; 299 } 300 // Check our invariants 301 assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); 302 assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && 303 "Cannot have a physical SubIdx"); 304 SrcReg = Src; 305 DstReg = Dst; 306 return true; 307 } 308 309 bool CoalescerPair::flip() { 310 if (TargetRegisterInfo::isPhysicalRegister(DstReg)) 311 return false; 312 std::swap(SrcReg, DstReg); 313 std::swap(SrcIdx, DstIdx); 314 Flipped = !Flipped; 315 return true; 316 } 317 318 bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { 319 if (!MI) 320 return false; 321 unsigned Src, Dst, SrcSub, DstSub; 322 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 323 return false; 324 325 // Find the virtual register that is SrcReg. 326 if (Dst == SrcReg) { 327 std::swap(Src, Dst); 328 std::swap(SrcSub, DstSub); 329 } else if (Src != SrcReg) { 330 return false; 331 } 332 333 // Now check that Dst matches DstReg. 334 if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { 335 if (!TargetRegisterInfo::isPhysicalRegister(Dst)) 336 return false; 337 assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state."); 338 // DstSub could be set for a physreg from INSERT_SUBREG. 339 if (DstSub) 340 Dst = TRI.getSubReg(Dst, DstSub); 341 // Full copy of Src. 342 if (!SrcSub) 343 return DstReg == Dst; 344 // This is a partial register copy. Check that the parts match. 345 return TRI.getSubReg(DstReg, SrcSub) == Dst; 346 } else { 347 // DstReg is virtual. 348 if (DstReg != Dst) 349 return false; 350 // Registers match, do the subregisters line up? 351 return TRI.composeSubRegIndices(SrcIdx, SrcSub) == 352 TRI.composeSubRegIndices(DstIdx, DstSub); 353 } 354 } 355 356 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { 357 AU.setPreservesCFG(); 358 AU.addRequired<AliasAnalysis>(); 359 AU.addRequired<LiveIntervals>(); 360 AU.addPreserved<LiveIntervals>(); 361 AU.addRequired<LiveDebugVariables>(); 362 AU.addPreserved<LiveDebugVariables>(); 363 AU.addPreserved<SlotIndexes>(); 364 AU.addRequired<MachineLoopInfo>(); 365 AU.addPreserved<MachineLoopInfo>(); 366 AU.addPreservedID(MachineDominatorsID); 367 MachineFunctionPass::getAnalysisUsage(AU); 368 } 369 370 void RegisterCoalescer::eliminateDeadDefs() { 371 SmallVector<LiveInterval*, 8> NewRegs; 372 LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); 373 } 374 375 // Callback from eliminateDeadDefs(). 376 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) { 377 // MI may be in WorkList. Make sure we don't visit it. 378 ErasedInstrs.insert(MI); 379 } 380 381 /// adjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA 382 /// being the source and IntB being the dest, thus this defines a value number 383 /// in IntB. If the source value number (in IntA) is defined by a copy from B, 384 /// see if we can merge these two pieces of B into a single value number, 385 /// eliminating a copy. For example: 386 /// 387 /// A3 = B0 388 /// ... 389 /// B1 = A3 <- this copy 390 /// 391 /// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 392 /// value number to be replaced with B0 (which simplifies the B liveinterval). 393 /// 394 /// This returns true if an interval was modified. 395 /// 396 bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, 397 MachineInstr *CopyMI) { 398 assert(!CP.isPartial() && "This doesn't work for partial copies."); 399 assert(!CP.isPhys() && "This doesn't work for physreg copies."); 400 401 LiveInterval &IntA = 402 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 403 LiveInterval &IntB = 404 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 405 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); 406 407 // BValNo is a value number in B that is defined by a copy from A. 'B3' in 408 // the example above. 409 LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); 410 if (BLR == IntB.end()) return false; 411 VNInfo *BValNo = BLR->valno; 412 413 // Get the location that B is defined at. Two options: either this value has 414 // an unknown definition point or it is defined at CopyIdx. If unknown, we 415 // can't process it. 416 if (BValNo->def != CopyIdx) return false; 417 418 // AValNo is the value number in A that defines the copy, A3 in the example. 419 SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); 420 LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); 421 // The live range might not exist after fun with physreg coalescing. 422 if (ALR == IntA.end()) return false; 423 VNInfo *AValNo = ALR->valno; 424 425 // If AValNo is defined as a copy from IntB, we can potentially process this. 426 // Get the instruction that defines this value number. 427 MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); 428 // Don't allow any partial copies, even if isCoalescable() allows them. 429 if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) 430 return false; 431 432 // Get the LiveRange in IntB that this value number starts with. 433 LiveInterval::iterator ValLR = 434 IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); 435 if (ValLR == IntB.end()) 436 return false; 437 438 // Make sure that the end of the live range is inside the same block as 439 // CopyMI. 440 MachineInstr *ValLREndInst = 441 LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); 442 if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) 443 return false; 444 445 // Okay, we now know that ValLR ends in the same block that the CopyMI 446 // live-range starts. If there are no intervening live ranges between them in 447 // IntB, we can merge them. 448 if (ValLR+1 != BLR) return false; 449 450 DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); 451 452 SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; 453 // We are about to delete CopyMI, so need to remove it as the 'instruction 454 // that defines this value #'. Update the valnum with the new defining 455 // instruction #. 456 BValNo->def = FillerStart; 457 458 // Okay, we can merge them. We need to insert a new liverange: 459 // [ValLR.end, BLR.begin) of either value number, then we merge the 460 // two value numbers. 461 IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); 462 463 // Okay, merge "B1" into the same value number as "B0". 464 if (BValNo != ValLR->valno) 465 IntB.MergeValueNumberInto(BValNo, ValLR->valno); 466 DEBUG(dbgs() << " result = " << IntB << '\n'); 467 468 // If the source instruction was killing the source register before the 469 // merge, unset the isKill marker given the live range has been extended. 470 int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); 471 if (UIdx != -1) { 472 ValLREndInst->getOperand(UIdx).setIsKill(false); 473 } 474 475 // Rewrite the copy. If the copy instruction was killing the destination 476 // register before the merge, find the last use and trim the live range. That 477 // will also add the isKill marker. 478 CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); 479 if (ALR->end == CopyIdx) 480 LIS->shrinkToUses(&IntA); 481 482 ++numExtends; 483 return true; 484 } 485 486 /// hasOtherReachingDefs - Return true if there are definitions of IntB 487 /// other than BValNo val# that can reach uses of AValno val# of IntA. 488 bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, 489 LiveInterval &IntB, 490 VNInfo *AValNo, 491 VNInfo *BValNo) { 492 // If AValNo has PHI kills, conservatively assume that IntB defs can reach 493 // the PHI values. 494 if (LIS->hasPHIKill(IntA, AValNo)) 495 return true; 496 497 for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); 498 AI != AE; ++AI) { 499 if (AI->valno != AValNo) continue; 500 LiveInterval::Ranges::iterator BI = 501 std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); 502 if (BI != IntB.ranges.begin()) 503 --BI; 504 for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { 505 if (BI->valno == BValNo) 506 continue; 507 if (BI->start <= AI->start && BI->end > AI->start) 508 return true; 509 if (BI->start > AI->start && BI->start < AI->end) 510 return true; 511 } 512 } 513 return false; 514 } 515 516 /// removeCopyByCommutingDef - We found a non-trivially-coalescable copy with 517 /// IntA being the source and IntB being the dest, thus this defines a value 518 /// number in IntB. If the source value number (in IntA) is defined by a 519 /// commutable instruction and its other operand is coalesced to the copy dest 520 /// register, see if we can transform the copy into a noop by commuting the 521 /// definition. For example, 522 /// 523 /// A3 = op A2 B0<kill> 524 /// ... 525 /// B1 = A3 <- this copy 526 /// ... 527 /// = op A3 <- more uses 528 /// 529 /// ==> 530 /// 531 /// B2 = op B0 A2<kill> 532 /// ... 533 /// B1 = B2 <- now an identify copy 534 /// ... 535 /// = op B2 <- more uses 536 /// 537 /// This returns true if an interval was modified. 538 /// 539 bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, 540 MachineInstr *CopyMI) { 541 assert (!CP.isPhys()); 542 543 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); 544 545 LiveInterval &IntA = 546 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 547 LiveInterval &IntB = 548 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 549 550 // BValNo is a value number in B that is defined by a copy from A. 'B3' in 551 // the example above. 552 VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); 553 if (!BValNo || BValNo->def != CopyIdx) 554 return false; 555 556 assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); 557 558 // AValNo is the value number in A that defines the copy, A3 in the example. 559 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); 560 assert(AValNo && "COPY source not live"); 561 if (AValNo->isPHIDef() || AValNo->isUnused()) 562 return false; 563 MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); 564 if (!DefMI) 565 return false; 566 if (!DefMI->isCommutable()) 567 return false; 568 // If DefMI is a two-address instruction then commuting it will change the 569 // destination register. 570 int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); 571 assert(DefIdx != -1); 572 unsigned UseOpIdx; 573 if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) 574 return false; 575 unsigned Op1, Op2, NewDstIdx; 576 if (!TII->findCommutedOpIndices(DefMI, Op1, Op2)) 577 return false; 578 if (Op1 == UseOpIdx) 579 NewDstIdx = Op2; 580 else if (Op2 == UseOpIdx) 581 NewDstIdx = Op1; 582 else 583 return false; 584 585 MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); 586 unsigned NewReg = NewDstMO.getReg(); 587 if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill()) 588 return false; 589 590 // Make sure there are no other definitions of IntB that would reach the 591 // uses which the new definition can reach. 592 if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) 593 return false; 594 595 // If some of the uses of IntA.reg is already coalesced away, return false. 596 // It's not possible to determine whether it's safe to perform the coalescing. 597 for (MachineRegisterInfo::use_nodbg_iterator UI = 598 MRI->use_nodbg_begin(IntA.reg), 599 UE = MRI->use_nodbg_end(); UI != UE; ++UI) { 600 MachineInstr *UseMI = &*UI; 601 SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); 602 LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); 603 if (ULR == IntA.end() || ULR->valno != AValNo) 604 continue; 605 // If this use is tied to a def, we can't rewrite the register. 606 if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) 607 return false; 608 } 609 610 DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t' 611 << *DefMI); 612 613 // At this point we have decided that it is legal to do this 614 // transformation. Start by commuting the instruction. 615 MachineBasicBlock *MBB = DefMI->getParent(); 616 MachineInstr *NewMI = TII->commuteInstruction(DefMI); 617 if (!NewMI) 618 return false; 619 if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && 620 TargetRegisterInfo::isVirtualRegister(IntB.reg) && 621 !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) 622 return false; 623 if (NewMI != DefMI) { 624 LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); 625 MachineBasicBlock::iterator Pos = DefMI; 626 MBB->insert(Pos, NewMI); 627 MBB->erase(DefMI); 628 } 629 unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); 630 NewMI->getOperand(OpIdx).setIsKill(); 631 632 // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. 633 // A = or A, B 634 // ... 635 // B = A 636 // ... 637 // C = A<kill> 638 // ... 639 // = B 640 641 // Update uses of IntA of the specific Val# with IntB. 642 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), 643 UE = MRI->use_end(); UI != UE;) { 644 MachineOperand &UseMO = UI.getOperand(); 645 MachineInstr *UseMI = &*UI; 646 ++UI; 647 if (UseMI->isDebugValue()) { 648 // FIXME These don't have an instruction index. Not clear we have enough 649 // info to decide whether to do this replacement or not. For now do it. 650 UseMO.setReg(NewReg); 651 continue; 652 } 653 SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); 654 LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); 655 if (ULR == IntA.end() || ULR->valno != AValNo) 656 continue; 657 // Kill flags are no longer accurate. They are recomputed after RA. 658 UseMO.setIsKill(false); 659 if (TargetRegisterInfo::isPhysicalRegister(NewReg)) 660 UseMO.substPhysReg(NewReg, *TRI); 661 else 662 UseMO.setReg(NewReg); 663 if (UseMI == CopyMI) 664 continue; 665 if (!UseMI->isCopy()) 666 continue; 667 if (UseMI->getOperand(0).getReg() != IntB.reg || 668 UseMI->getOperand(0).getSubReg()) 669 continue; 670 671 // This copy will become a noop. If it's defining a new val#, merge it into 672 // BValNo. 673 SlotIndex DefIdx = UseIdx.getRegSlot(); 674 VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); 675 if (!DVNI) 676 continue; 677 DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); 678 assert(DVNI->def == DefIdx); 679 BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); 680 ErasedInstrs.insert(UseMI); 681 LIS->RemoveMachineInstrFromMaps(UseMI); 682 UseMI->eraseFromParent(); 683 } 684 685 // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition 686 // is updated. 687 VNInfo *ValNo = BValNo; 688 ValNo->def = AValNo->def; 689 for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); 690 AI != AE; ++AI) { 691 if (AI->valno != AValNo) continue; 692 IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); 693 } 694 DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); 695 696 IntA.removeValNo(AValNo); 697 DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); 698 ++numCommutes; 699 return true; 700 } 701 702 /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial 703 /// computation, replace the copy by rematerialize the definition. 704 bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, 705 unsigned DstReg, 706 MachineInstr *CopyMI) { 707 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); 708 LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); 709 assert(SrcLR != SrcInt.end() && "Live range not found!"); 710 VNInfo *ValNo = SrcLR->valno; 711 if (ValNo->isPHIDef() || ValNo->isUnused()) 712 return false; 713 MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); 714 if (!DefMI) 715 return false; 716 assert(DefMI && "Defining instruction disappeared"); 717 if (!DefMI->isAsCheapAsAMove()) 718 return false; 719 if (!TII->isTriviallyReMaterializable(DefMI, AA)) 720 return false; 721 bool SawStore = false; 722 if (!DefMI->isSafeToMove(TII, AA, SawStore)) 723 return false; 724 const MCInstrDesc &MCID = DefMI->getDesc(); 725 if (MCID.getNumDefs() != 1) 726 return false; 727 if (!DefMI->isImplicitDef()) { 728 // Make sure the copy destination register class fits the instruction 729 // definition register class. The mismatch can happen as a result of earlier 730 // extract_subreg, insert_subreg, subreg_to_reg coalescing. 731 const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); 732 if (TargetRegisterInfo::isVirtualRegister(DstReg)) { 733 if (MRI->getRegClass(DstReg) != RC) 734 return false; 735 } else if (!RC->contains(DstReg)) 736 return false; 737 } 738 739 MachineBasicBlock *MBB = CopyMI->getParent(); 740 MachineBasicBlock::iterator MII = 741 llvm::next(MachineBasicBlock::iterator(CopyMI)); 742 TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); 743 MachineInstr *NewMI = prior(MII); 744 745 // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). 746 // We need to remember these so we can add intervals once we insert 747 // NewMI into SlotIndexes. 748 SmallVector<unsigned, 4> NewMIImplDefs; 749 for (unsigned i = NewMI->getDesc().getNumOperands(), 750 e = NewMI->getNumOperands(); i != e; ++i) { 751 MachineOperand &MO = NewMI->getOperand(i); 752 if (MO.isReg()) { 753 assert(MO.isDef() && MO.isImplicit() && MO.isDead() && 754 TargetRegisterInfo::isPhysicalRegister(MO.getReg())); 755 NewMIImplDefs.push_back(MO.getReg()); 756 } 757 } 758 759 // CopyMI may have implicit operands, transfer them over to the newly 760 // rematerialized instruction. And update implicit def interval valnos. 761 for (unsigned i = CopyMI->getDesc().getNumOperands(), 762 e = CopyMI->getNumOperands(); i != e; ++i) { 763 MachineOperand &MO = CopyMI->getOperand(i); 764 if (MO.isReg()) { 765 assert(MO.isImplicit() && "No explicit operands after implict operands."); 766 // Discard VReg implicit defs. 767 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 768 NewMI->addOperand(MO); 769 } 770 } 771 } 772 773 LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); 774 775 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); 776 for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { 777 unsigned Reg = NewMIImplDefs[i]; 778 for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) 779 if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) 780 LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); 781 } 782 783 CopyMI->eraseFromParent(); 784 ErasedInstrs.insert(CopyMI); 785 DEBUG(dbgs() << "Remat: " << *NewMI); 786 ++NumReMats; 787 788 // The source interval can become smaller because we removed a use. 789 LIS->shrinkToUses(&SrcInt, &DeadDefs); 790 if (!DeadDefs.empty()) 791 eliminateDeadDefs(); 792 793 return true; 794 } 795 796 /// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef> 797 /// values, it only removes local variables. When we have a copy like: 798 /// 799 /// %vreg1 = COPY %vreg2<undef> 800 /// 801 /// We delete the copy and remove the corresponding value number from %vreg1. 802 /// Any uses of that value number are marked as <undef>. 803 bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, 804 const CoalescerPair &CP) { 805 SlotIndex Idx = LIS->getInstructionIndex(CopyMI); 806 LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg()); 807 if (SrcInt->liveAt(Idx)) 808 return false; 809 LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg()); 810 if (DstInt->liveAt(Idx)) 811 return false; 812 813 // No intervals are live-in to CopyMI - it is undef. 814 if (CP.isFlipped()) 815 DstInt = SrcInt; 816 SrcInt = 0; 817 818 VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); 819 assert(DeadVNI && "No value defined in DstInt"); 820 DstInt->removeValNo(DeadVNI); 821 822 // Find new undef uses. 823 for (MachineRegisterInfo::reg_nodbg_iterator 824 I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end(); 825 I != E; ++I) { 826 MachineOperand &MO = I.getOperand(); 827 if (MO.isDef() || MO.isUndef()) 828 continue; 829 MachineInstr *MI = MO.getParent(); 830 SlotIndex Idx = LIS->getInstructionIndex(MI); 831 if (DstInt->liveAt(Idx)) 832 continue; 833 MO.setIsUndef(true); 834 DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI); 835 } 836 return true; 837 } 838 839 /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and 840 /// update the subregister number if it is not zero. If DstReg is a 841 /// physical register and the existing subregister number of the def / use 842 /// being updated is not zero, make sure to set it to the correct physical 843 /// subregister. 844 void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, 845 unsigned DstReg, 846 unsigned SubIdx) { 847 bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); 848 LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); 849 850 // Update LiveDebugVariables. 851 LDV->renameRegister(SrcReg, DstReg, SubIdx); 852 853 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); 854 MachineInstr *UseMI = I.skipInstruction();) { 855 SmallVector<unsigned,8> Ops; 856 bool Reads, Writes; 857 tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); 858 859 // If SrcReg wasn't read, it may still be the case that DstReg is live-in 860 // because SrcReg is a sub-register. 861 if (DstInt && !Reads && SubIdx) 862 Reads = DstInt->liveAt(LIS->getInstructionIndex(UseMI)); 863 864 // Replace SrcReg with DstReg in all UseMI operands. 865 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 866 MachineOperand &MO = UseMI->getOperand(Ops[i]); 867 868 // Adjust <undef> flags in case of sub-register joins. We don't want to 869 // turn a full def into a read-modify-write sub-register def and vice 870 // versa. 871 if (SubIdx && MO.isDef()) 872 MO.setIsUndef(!Reads); 873 874 if (DstIsPhys) 875 MO.substPhysReg(DstReg, *TRI); 876 else 877 MO.substVirtReg(DstReg, SubIdx, *TRI); 878 } 879 880 DEBUG({ 881 dbgs() << "\t\tupdated: "; 882 if (!UseMI->isDebugValue()) 883 dbgs() << LIS->getInstructionIndex(UseMI) << "\t"; 884 dbgs() << *UseMI; 885 }); 886 } 887 } 888 889 /// canJoinPhys - Return true if a copy involving a physreg should be joined. 890 bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) { 891 /// Always join simple intervals that are defined by a single copy from a 892 /// reserved register. This doesn't increase register pressure, so it is 893 /// always beneficial. 894 if (!MRI->isReserved(CP.getDstReg())) { 895 DEBUG(dbgs() << "\tCan only merge into reserved registers.\n"); 896 return false; 897 } 898 899 LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); 900 if (CP.isFlipped() && JoinVInt.containsOneValue()) 901 return true; 902 903 DEBUG(dbgs() << "\tCannot join defs into reserved register.\n"); 904 return false; 905 } 906 907 /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, 908 /// which are the src/dst of the copy instruction CopyMI. This returns true 909 /// if the copy was successfully coalesced away. If it is not currently 910 /// possible to coalesce this interval, but it may be possible if other 911 /// things get coalesced, then it returns true by reference in 'Again'. 912 bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { 913 914 Again = false; 915 DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); 916 917 CoalescerPair CP(*TRI); 918 if (!CP.setRegisters(CopyMI)) { 919 DEBUG(dbgs() << "\tNot coalescable.\n"); 920 return false; 921 } 922 923 // Dead code elimination. This really should be handled by MachineDCE, but 924 // sometimes dead copies slip through, and we can't generate invalid live 925 // ranges. 926 if (!CP.isPhys() && CopyMI->allDefsAreDead()) { 927 DEBUG(dbgs() << "\tCopy is dead.\n"); 928 DeadDefs.push_back(CopyMI); 929 eliminateDeadDefs(); 930 return true; 931 } 932 933 // Eliminate undefs. 934 if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { 935 DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); 936 LIS->RemoveMachineInstrFromMaps(CopyMI); 937 CopyMI->eraseFromParent(); 938 return false; // Not coalescable. 939 } 940 941 // Coalesced copies are normally removed immediately, but transformations 942 // like removeCopyByCommutingDef() can inadvertently create identity copies. 943 // When that happens, just join the values and remove the copy. 944 if (CP.getSrcReg() == CP.getDstReg()) { 945 LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); 946 DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); 947 LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI)); 948 if (VNInfo *DefVNI = LRQ.valueDefined()) { 949 VNInfo *ReadVNI = LRQ.valueIn(); 950 assert(ReadVNI && "No value before copy and no <undef> flag."); 951 assert(ReadVNI != DefVNI && "Cannot read and define the same value."); 952 LI.MergeValueNumberInto(DefVNI, ReadVNI); 953 DEBUG(dbgs() << "\tMerged values: " << LI << '\n'); 954 } 955 LIS->RemoveMachineInstrFromMaps(CopyMI); 956 CopyMI->eraseFromParent(); 957 return true; 958 } 959 960 // Enforce policies. 961 if (CP.isPhys()) { 962 DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) 963 << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) 964 << '\n'); 965 if (!canJoinPhys(CP)) { 966 // Before giving up coalescing, if definition of source is defined by 967 // trivial computation, try rematerializing it. 968 if (!CP.isFlipped() && 969 reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), 970 CP.getDstReg(), CopyMI)) 971 return true; 972 return false; 973 } 974 } else { 975 DEBUG({ 976 dbgs() << "\tConsidering merging to " << CP.getNewRC()->getName() 977 << " with "; 978 if (CP.getDstIdx() && CP.getSrcIdx()) 979 dbgs() << PrintReg(CP.getDstReg()) << " in " 980 << TRI->getSubRegIndexName(CP.getDstIdx()) << " and " 981 << PrintReg(CP.getSrcReg()) << " in " 982 << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n'; 983 else 984 dbgs() << PrintReg(CP.getSrcReg(), TRI) << " in " 985 << PrintReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n'; 986 }); 987 988 // When possible, let DstReg be the larger interval. 989 if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() > 990 LIS->getInterval(CP.getDstReg()).ranges.size()) 991 CP.flip(); 992 } 993 994 // Okay, attempt to join these two intervals. On failure, this returns false. 995 // Otherwise, if one of the intervals being joined is a physreg, this method 996 // always canonicalizes DstInt to be it. The output "SrcInt" will not have 997 // been modified, so we can use this information below to update aliases. 998 if (!joinIntervals(CP)) { 999 // Coalescing failed. 1000 1001 // If definition of source is defined by trivial computation, try 1002 // rematerializing it. 1003 if (!CP.isFlipped() && 1004 reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), 1005 CP.getDstReg(), CopyMI)) 1006 return true; 1007 1008 // If we can eliminate the copy without merging the live ranges, do so now. 1009 if (!CP.isPartial() && !CP.isPhys()) { 1010 if (adjustCopiesBackFrom(CP, CopyMI) || 1011 removeCopyByCommutingDef(CP, CopyMI)) { 1012 LIS->RemoveMachineInstrFromMaps(CopyMI); 1013 CopyMI->eraseFromParent(); 1014 DEBUG(dbgs() << "\tTrivial!\n"); 1015 return true; 1016 } 1017 } 1018 1019 // Otherwise, we are unable to join the intervals. 1020 DEBUG(dbgs() << "\tInterference!\n"); 1021 Again = true; // May be possible to coalesce later. 1022 return false; 1023 } 1024 1025 // Coalescing to a virtual register that is of a sub-register class of the 1026 // other. Make sure the resulting register is set to the right register class. 1027 if (CP.isCrossClass()) { 1028 ++numCrossRCs; 1029 MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); 1030 } 1031 1032 // Removing sub-register copies can ease the register class constraints. 1033 // Make sure we attempt to inflate the register class of DstReg. 1034 if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC())) 1035 InflateRegs.push_back(CP.getDstReg()); 1036 1037 // CopyMI has been erased by joinIntervals at this point. Remove it from 1038 // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back 1039 // to the work list. This keeps ErasedInstrs from growing needlessly. 1040 ErasedInstrs.erase(CopyMI); 1041 1042 // Rewrite all SrcReg operands to DstReg. 1043 // Also update DstReg operands to include DstIdx if it is set. 1044 if (CP.getDstIdx()) 1045 updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx()); 1046 updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx()); 1047 1048 // SrcReg is guaranteed to be the register whose live interval that is 1049 // being merged. 1050 LIS->removeInterval(CP.getSrcReg()); 1051 1052 // Update regalloc hint. 1053 TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); 1054 1055 DEBUG({ 1056 dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); 1057 if (!CP.isPhys()) 1058 dbgs() << LIS->getInterval(CP.getDstReg()); 1059 dbgs() << '\n'; 1060 }); 1061 1062 ++numJoins; 1063 return true; 1064 } 1065 1066 /// Attempt joining with a reserved physreg. 1067 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { 1068 assert(CP.isPhys() && "Must be a physreg copy"); 1069 assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); 1070 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 1071 DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS 1072 << '\n'); 1073 1074 assert(CP.isFlipped() && RHS.containsOneValue() && 1075 "Invalid join with reserved register"); 1076 1077 // Optimization for reserved registers like ESP. We can only merge with a 1078 // reserved physreg if RHS has a single value that is a copy of CP.DstReg(). 1079 // The live range of the reserved register will look like a set of dead defs 1080 // - we don't properly track the live range of reserved registers. 1081 1082 // Deny any overlapping intervals. This depends on all the reserved 1083 // register live ranges to look like dead defs. 1084 for (MCRegUnitIterator UI(CP.getDstReg(), TRI); UI.isValid(); ++UI) 1085 if (RHS.overlaps(LIS->getRegUnit(*UI))) { 1086 DEBUG(dbgs() << "\t\tInterference: " << PrintRegUnit(*UI, TRI) << '\n'); 1087 return false; 1088 } 1089 1090 // Skip any value computations, we are not adding new values to the 1091 // reserved register. Also skip merging the live ranges, the reserved 1092 // register live range doesn't need to be accurate as long as all the 1093 // defs are there. 1094 1095 // Delete the identity copy. 1096 MachineInstr *CopyMI = MRI->getVRegDef(RHS.reg); 1097 LIS->RemoveMachineInstrFromMaps(CopyMI); 1098 CopyMI->eraseFromParent(); 1099 1100 // We don't track kills for reserved registers. 1101 MRI->clearKillFlags(CP.getSrcReg()); 1102 1103 return true; 1104 } 1105 1106 //===----------------------------------------------------------------------===// 1107 // Interference checking and interval joining 1108 //===----------------------------------------------------------------------===// 1109 // 1110 // In the easiest case, the two live ranges being joined are disjoint, and 1111 // there is no interference to consider. It is quite common, though, to have 1112 // overlapping live ranges, and we need to check if the interference can be 1113 // resolved. 1114 // 1115 // The live range of a single SSA value forms a sub-tree of the dominator tree. 1116 // This means that two SSA values overlap if and only if the def of one value 1117 // is contained in the live range of the other value. As a special case, the 1118 // overlapping values can be defined at the same index. 1119 // 1120 // The interference from an overlapping def can be resolved in these cases: 1121 // 1122 // 1. Coalescable copies. The value is defined by a copy that would become an 1123 // identity copy after joining SrcReg and DstReg. The copy instruction will 1124 // be removed, and the value will be merged with the source value. 1125 // 1126 // There can be several copies back and forth, causing many values to be 1127 // merged into one. We compute a list of ultimate values in the joined live 1128 // range as well as a mappings from the old value numbers. 1129 // 1130 // 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI 1131 // predecessors have a live out value. It doesn't cause real interference, 1132 // and can be merged into the value it overlaps. Like a coalescable copy, it 1133 // can be erased after joining. 1134 // 1135 // 3. Copy of external value. The overlapping def may be a copy of a value that 1136 // is already in the other register. This is like a coalescable copy, but 1137 // the live range of the source register must be trimmed after erasing the 1138 // copy instruction: 1139 // 1140 // %src = COPY %ext 1141 // %dst = COPY %ext <-- Remove this COPY, trim the live range of %ext. 1142 // 1143 // 4. Clobbering undefined lanes. Vector registers are sometimes built by 1144 // defining one lane at a time: 1145 // 1146 // %dst:ssub0<def,read-undef> = FOO 1147 // %src = BAR 1148 // %dst:ssub1<def> = COPY %src 1149 // 1150 // The live range of %src overlaps the %dst value defined by FOO, but 1151 // merging %src into %dst:ssub1 is only going to clobber the ssub1 lane 1152 // which was undef anyway. 1153 // 1154 // The value mapping is more complicated in this case. The final live range 1155 // will have different value numbers for both FOO and BAR, but there is no 1156 // simple mapping from old to new values. It may even be necessary to add 1157 // new PHI values. 1158 // 1159 // 5. Clobbering dead lanes. A def may clobber a lane of a vector register that 1160 // is live, but never read. This can happen because we don't compute 1161 // individual live ranges per lane. 1162 // 1163 // %dst<def> = FOO 1164 // %src = BAR 1165 // %dst:ssub1<def> = COPY %src 1166 // 1167 // This kind of interference is only resolved locally. If the clobbered 1168 // lane value escapes the block, the join is aborted. 1169 1170 namespace { 1171 /// Track information about values in a single virtual register about to be 1172 /// joined. Objects of this class are always created in pairs - one for each 1173 /// side of the CoalescerPair. 1174 class JoinVals { 1175 LiveInterval &LI; 1176 1177 // Location of this register in the final joined register. 1178 // Either CP.DstIdx or CP.SrcIdx. 1179 unsigned SubIdx; 1180 1181 // Values that will be present in the final live range. 1182 SmallVectorImpl<VNInfo*> &NewVNInfo; 1183 1184 const CoalescerPair &CP; 1185 LiveIntervals *LIS; 1186 SlotIndexes *Indexes; 1187 const TargetRegisterInfo *TRI; 1188 1189 // Value number assignments. Maps value numbers in LI to entries in NewVNInfo. 1190 // This is suitable for passing to LiveInterval::join(). 1191 SmallVector<int, 8> Assignments; 1192 1193 // Conflict resolution for overlapping values. 1194 enum ConflictResolution { 1195 // No overlap, simply keep this value. 1196 CR_Keep, 1197 1198 // Merge this value into OtherVNI and erase the defining instruction. 1199 // Used for IMPLICIT_DEF, coalescable copies, and copies from external 1200 // values. 1201 CR_Erase, 1202 1203 // Merge this value into OtherVNI but keep the defining instruction. 1204 // This is for the special case where OtherVNI is defined by the same 1205 // instruction. 1206 CR_Merge, 1207 1208 // Keep this value, and have it replace OtherVNI where possible. This 1209 // complicates value mapping since OtherVNI maps to two different values 1210 // before and after this def. 1211 // Used when clobbering undefined or dead lanes. 1212 CR_Replace, 1213 1214 // Unresolved conflict. Visit later when all values have been mapped. 1215 CR_Unresolved, 1216 1217 // Unresolvable conflict. Abort the join. 1218 CR_Impossible 1219 }; 1220 1221 // Per-value info for LI. The lane bit masks are all relative to the final 1222 // joined register, so they can be compared directly between SrcReg and 1223 // DstReg. 1224 struct Val { 1225 ConflictResolution Resolution; 1226 1227 // Lanes written by this def, 0 for unanalyzed values. 1228 unsigned WriteLanes; 1229 1230 // Lanes with defined values in this register. Other lanes are undef and 1231 // safe to clobber. 1232 unsigned ValidLanes; 1233 1234 // Value in LI being redefined by this def. 1235 VNInfo *RedefVNI; 1236 1237 // Value in the other live range that overlaps this def, if any. 1238 VNInfo *OtherVNI; 1239 1240 // Is this value an IMPLICIT_DEF? 1241 bool IsImplicitDef; 1242 1243 // True when the live range of this value will be pruned because of an 1244 // overlapping CR_Replace value in the other live range. 1245 bool Pruned; 1246 1247 // True once Pruned above has been computed. 1248 bool PrunedComputed; 1249 1250 Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), 1251 RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false), 1252 PrunedComputed(false) {} 1253 1254 bool isAnalyzed() const { return WriteLanes != 0; } 1255 }; 1256 1257 // One entry per value number in LI. 1258 SmallVector<Val, 8> Vals; 1259 1260 unsigned computeWriteLanes(const MachineInstr *DefMI, bool &Redef); 1261 VNInfo *stripCopies(VNInfo *VNI); 1262 ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other); 1263 void computeAssignment(unsigned ValNo, JoinVals &Other); 1264 bool taintExtent(unsigned, unsigned, JoinVals&, 1265 SmallVectorImpl<std::pair<SlotIndex, unsigned> >&); 1266 bool usesLanes(MachineInstr *MI, unsigned, unsigned, unsigned); 1267 bool isPrunedValue(unsigned ValNo, JoinVals &Other); 1268 1269 public: 1270 JoinVals(LiveInterval &li, unsigned subIdx, 1271 SmallVectorImpl<VNInfo*> &newVNInfo, 1272 const CoalescerPair &cp, 1273 LiveIntervals *lis, 1274 const TargetRegisterInfo *tri) 1275 : LI(li), SubIdx(subIdx), NewVNInfo(newVNInfo), CP(cp), LIS(lis), 1276 Indexes(LIS->getSlotIndexes()), TRI(tri), 1277 Assignments(LI.getNumValNums(), -1), Vals(LI.getNumValNums()) 1278 {} 1279 1280 /// Analyze defs in LI and compute a value mapping in NewVNInfo. 1281 /// Returns false if any conflicts were impossible to resolve. 1282 bool mapValues(JoinVals &Other); 1283 1284 /// Try to resolve conflicts that require all values to be mapped. 1285 /// Returns false if any conflicts were impossible to resolve. 1286 bool resolveConflicts(JoinVals &Other); 1287 1288 /// Prune the live range of values in Other.LI where they would conflict with 1289 /// CR_Replace values in LI. Collect end points for restoring the live range 1290 /// after joining. 1291 void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints); 1292 1293 /// Erase any machine instructions that have been coalesced away. 1294 /// Add erased instructions to ErasedInstrs. 1295 /// Add foreign virtual registers to ShrinkRegs if their live range ended at 1296 /// the erased instrs. 1297 void eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, 1298 SmallVectorImpl<unsigned> &ShrinkRegs); 1299 1300 /// Get the value assignments suitable for passing to LiveInterval::join. 1301 const int *getAssignments() const { return Assignments.data(); } 1302 }; 1303 } // end anonymous namespace 1304 1305 /// Compute the bitmask of lanes actually written by DefMI. 1306 /// Set Redef if there are any partial register definitions that depend on the 1307 /// previous value of the register. 1308 unsigned JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef) { 1309 unsigned L = 0; 1310 for (ConstMIOperands MO(DefMI); MO.isValid(); ++MO) { 1311 if (!MO->isReg() || MO->getReg() != LI.reg || !MO->isDef()) 1312 continue; 1313 L |= TRI->getSubRegIndexLaneMask( 1314 TRI->composeSubRegIndices(SubIdx, MO->getSubReg())); 1315 if (MO->readsReg()) 1316 Redef = true; 1317 } 1318 return L; 1319 } 1320 1321 /// Find the ultimate value that VNI was copied from. 1322 VNInfo *JoinVals::stripCopies(VNInfo *VNI) { 1323 while (!VNI->isPHIDef()) { 1324 MachineInstr *MI = Indexes->getInstructionFromIndex(VNI->def); 1325 assert(MI && "No defining instruction"); 1326 if (!MI->isFullCopy()) 1327 break; 1328 unsigned Reg = MI->getOperand(1).getReg(); 1329 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1330 break; 1331 LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def); 1332 if (!LRQ.valueIn()) 1333 break; 1334 VNI = LRQ.valueIn(); 1335 } 1336 return VNI; 1337 } 1338 1339 /// Analyze ValNo in this live range, and set all fields of Vals[ValNo]. 1340 /// Return a conflict resolution when possible, but leave the hard cases as 1341 /// CR_Unresolved. 1342 /// Recursively calls computeAssignment() on this and Other, guaranteeing that 1343 /// both OtherVNI and RedefVNI have been analyzed and mapped before returning. 1344 /// The recursion always goes upwards in the dominator tree, making loops 1345 /// impossible. 1346 JoinVals::ConflictResolution 1347 JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { 1348 Val &V = Vals[ValNo]; 1349 assert(!V.isAnalyzed() && "Value has already been analyzed!"); 1350 VNInfo *VNI = LI.getValNumInfo(ValNo); 1351 if (VNI->isUnused()) { 1352 V.WriteLanes = ~0u; 1353 return CR_Keep; 1354 } 1355 1356 // Get the instruction defining this value, compute the lanes written. 1357 const MachineInstr *DefMI = 0; 1358 if (VNI->isPHIDef()) { 1359 // Conservatively assume that all lanes in a PHI are valid. 1360 V.ValidLanes = V.WriteLanes = TRI->getSubRegIndexLaneMask(SubIdx); 1361 } else { 1362 DefMI = Indexes->getInstructionFromIndex(VNI->def); 1363 bool Redef = false; 1364 V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef); 1365 1366 // If this is a read-modify-write instruction, there may be more valid 1367 // lanes than the ones written by this instruction. 1368 // This only covers partial redef operands. DefMI may have normal use 1369 // operands reading the register. They don't contribute valid lanes. 1370 // 1371 // This adds ssub1 to the set of valid lanes in %src: 1372 // 1373 // %src:ssub1<def> = FOO 1374 // 1375 // This leaves only ssub1 valid, making any other lanes undef: 1376 // 1377 // %src:ssub1<def,read-undef> = FOO %src:ssub2 1378 // 1379 // The <read-undef> flag on the def operand means that old lane values are 1380 // not important. 1381 if (Redef) { 1382 V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn(); 1383 assert(V.RedefVNI && "Instruction is reading nonexistent value"); 1384 computeAssignment(V.RedefVNI->id, Other); 1385 V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; 1386 } 1387 1388 // An IMPLICIT_DEF writes undef values. 1389 if (DefMI->isImplicitDef()) { 1390 V.IsImplicitDef = true; 1391 V.ValidLanes &= ~V.WriteLanes; 1392 } 1393 } 1394 1395 // Find the value in Other that overlaps VNI->def, if any. 1396 LiveRangeQuery OtherLRQ(Other.LI, VNI->def); 1397 1398 // It is possible that both values are defined by the same instruction, or 1399 // the values are PHIs defined in the same block. When that happens, the two 1400 // values should be merged into one, but not into any preceding value. 1401 // The first value defined or visited gets CR_Keep, the other gets CR_Merge. 1402 if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) { 1403 assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ"); 1404 1405 // One value stays, the other is merged. Keep the earlier one, or the first 1406 // one we see. 1407 if (OtherVNI->def < VNI->def) 1408 Other.computeAssignment(OtherVNI->id, *this); 1409 else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) { 1410 // This is an early-clobber def overlapping a live-in value in the other 1411 // register. Not mergeable. 1412 V.OtherVNI = OtherLRQ.valueIn(); 1413 return CR_Impossible; 1414 } 1415 V.OtherVNI = OtherVNI; 1416 Val &OtherV = Other.Vals[OtherVNI->id]; 1417 // Keep this value, check for conflicts when analyzing OtherVNI. 1418 if (!OtherV.isAnalyzed()) 1419 return CR_Keep; 1420 // Both sides have been analyzed now. 1421 // Allow overlapping PHI values. Any real interference would show up in a 1422 // predecessor, the PHI itself can't introduce any conflicts. 1423 if (VNI->isPHIDef()) 1424 return CR_Merge; 1425 if (V.ValidLanes & OtherV.ValidLanes) 1426 // Overlapping lanes can't be resolved. 1427 return CR_Impossible; 1428 else 1429 return CR_Merge; 1430 } 1431 1432 // No simultaneous def. Is Other live at the def? 1433 V.OtherVNI = OtherLRQ.valueIn(); 1434 if (!V.OtherVNI) 1435 // No overlap, no conflict. 1436 return CR_Keep; 1437 1438 assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ"); 1439 1440 // We have overlapping values, or possibly a kill of Other. 1441 // Recursively compute assignments up the dominator tree. 1442 Other.computeAssignment(V.OtherVNI->id, *this); 1443 const Val &OtherV = Other.Vals[V.OtherVNI->id]; 1444 1445 // Allow overlapping PHI values. Any real interference would show up in a 1446 // predecessor, the PHI itself can't introduce any conflicts. 1447 if (VNI->isPHIDef()) 1448 return CR_Replace; 1449 1450 // Check for simple erasable conflicts. 1451 if (DefMI->isImplicitDef()) 1452 return CR_Erase; 1453 1454 // Include the non-conflict where DefMI is a coalescable copy that kills 1455 // OtherVNI. We still want the copy erased and value numbers merged. 1456 if (CP.isCoalescable(DefMI)) { 1457 // Some of the lanes copied from OtherVNI may be undef, making them undef 1458 // here too. 1459 V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes; 1460 return CR_Erase; 1461 } 1462 1463 // This may not be a real conflict if DefMI simply kills Other and defines 1464 // VNI. 1465 if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def) 1466 return CR_Keep; 1467 1468 // Handle the case where VNI and OtherVNI can be proven to be identical: 1469 // 1470 // %other = COPY %ext 1471 // %this = COPY %ext <-- Erase this copy 1472 // 1473 if (DefMI->isFullCopy() && !CP.isPartial() && 1474 stripCopies(VNI) == stripCopies(V.OtherVNI)) 1475 return CR_Erase; 1476 1477 // If the lanes written by this instruction were all undef in OtherVNI, it is 1478 // still safe to join the live ranges. This can't be done with a simple value 1479 // mapping, though - OtherVNI will map to multiple values: 1480 // 1481 // 1 %dst:ssub0 = FOO <-- OtherVNI 1482 // 2 %src = BAR <-- VNI 1483 // 3 %dst:ssub1 = COPY %src<kill> <-- Eliminate this copy. 1484 // 4 BAZ %dst<kill> 1485 // 5 QUUX %src<kill> 1486 // 1487 // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace 1488 // handles this complex value mapping. 1489 if ((V.WriteLanes & OtherV.ValidLanes) == 0) 1490 return CR_Replace; 1491 1492 // If the other live range is killed by DefMI and the live ranges are still 1493 // overlapping, it must be because we're looking at an early clobber def: 1494 // 1495 // %dst<def,early-clobber> = ASM %src<kill> 1496 // 1497 // In this case, it is illegal to merge the two live ranges since the early 1498 // clobber def would clobber %src before it was read. 1499 if (OtherLRQ.isKill()) { 1500 // This case where the def doesn't overlap the kill is handled above. 1501 assert(VNI->def.isEarlyClobber() && 1502 "Only early clobber defs can overlap a kill"); 1503 return CR_Impossible; 1504 } 1505 1506 // VNI is clobbering live lanes in OtherVNI, but there is still the 1507 // possibility that no instructions actually read the clobbered lanes. 1508 // If we're clobbering all the lanes in OtherVNI, at least one must be read. 1509 // Otherwise Other.LI wouldn't be live here. 1510 if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes) == 0) 1511 return CR_Impossible; 1512 1513 // We need to verify that no instructions are reading the clobbered lanes. To 1514 // save compile time, we'll only check that locally. Don't allow the tainted 1515 // value to escape the basic block. 1516 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 1517 if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB)) 1518 return CR_Impossible; 1519 1520 // There are still some things that could go wrong besides clobbered lanes 1521 // being read, for example OtherVNI may be only partially redefined in MBB, 1522 // and some clobbered lanes could escape the block. Save this analysis for 1523 // resolveConflicts() when all values have been mapped. We need to know 1524 // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute 1525 // that now - the recursive analyzeValue() calls must go upwards in the 1526 // dominator tree. 1527 return CR_Unresolved; 1528 } 1529 1530 /// Compute the value assignment for ValNo in LI. 1531 /// This may be called recursively by analyzeValue(), but never for a ValNo on 1532 /// the stack. 1533 void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) { 1534 Val &V = Vals[ValNo]; 1535 if (V.isAnalyzed()) { 1536 // Recursion should always move up the dominator tree, so ValNo is not 1537 // supposed to reappear before it has been assigned. 1538 assert(Assignments[ValNo] != -1 && "Bad recursion?"); 1539 return; 1540 } 1541 switch ((V.Resolution = analyzeValue(ValNo, Other))) { 1542 case CR_Erase: 1543 case CR_Merge: 1544 // Merge this ValNo into OtherVNI. 1545 assert(V.OtherVNI && "OtherVNI not assigned, can't merge."); 1546 assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion"); 1547 Assignments[ValNo] = Other.Assignments[V.OtherVNI->id]; 1548 DEBUG(dbgs() << "\t\tmerge " << PrintReg(LI.reg) << ':' << ValNo << '@' 1549 << LI.getValNumInfo(ValNo)->def << " into " 1550 << PrintReg(Other.LI.reg) << ':' << V.OtherVNI->id << '@' 1551 << V.OtherVNI->def << " --> @" 1552 << NewVNInfo[Assignments[ValNo]]->def << '\n'); 1553 break; 1554 case CR_Replace: 1555 case CR_Unresolved: 1556 // The other value is going to be pruned if this join is successful. 1557 assert(V.OtherVNI && "OtherVNI not assigned, can't prune"); 1558 Other.Vals[V.OtherVNI->id].Pruned = true; 1559 // Fall through. 1560 default: 1561 // This value number needs to go in the final joined live range. 1562 Assignments[ValNo] = NewVNInfo.size(); 1563 NewVNInfo.push_back(LI.getValNumInfo(ValNo)); 1564 break; 1565 } 1566 } 1567 1568 bool JoinVals::mapValues(JoinVals &Other) { 1569 for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { 1570 computeAssignment(i, Other); 1571 if (Vals[i].Resolution == CR_Impossible) { 1572 DEBUG(dbgs() << "\t\tinterference at " << PrintReg(LI.reg) << ':' << i 1573 << '@' << LI.getValNumInfo(i)->def << '\n'); 1574 return false; 1575 } 1576 } 1577 return true; 1578 } 1579 1580 /// Assuming ValNo is going to clobber some valid lanes in Other.LI, compute 1581 /// the extent of the tainted lanes in the block. 1582 /// 1583 /// Multiple values in Other.LI can be affected since partial redefinitions can 1584 /// preserve previously tainted lanes. 1585 /// 1586 /// 1 %dst = VLOAD <-- Define all lanes in %dst 1587 /// 2 %src = FOO <-- ValNo to be joined with %dst:ssub0 1588 /// 3 %dst:ssub1 = BAR <-- Partial redef doesn't clear taint in ssub0 1589 /// 4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read 1590 /// 1591 /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes) 1592 /// entry to TaintedVals. 1593 /// 1594 /// Returns false if the tainted lanes extend beyond the basic block. 1595 bool JoinVals:: 1596 taintExtent(unsigned ValNo, unsigned TaintedLanes, JoinVals &Other, 1597 SmallVectorImpl<std::pair<SlotIndex, unsigned> > &TaintExtent) { 1598 VNInfo *VNI = LI.getValNumInfo(ValNo); 1599 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 1600 SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB); 1601 1602 // Scan Other.LI from VNI.def to MBBEnd. 1603 LiveInterval::iterator OtherI = Other.LI.find(VNI->def); 1604 assert(OtherI != Other.LI.end() && "No conflict?"); 1605 do { 1606 // OtherI is pointing to a tainted value. Abort the join if the tainted 1607 // lanes escape the block. 1608 SlotIndex End = OtherI->end; 1609 if (End >= MBBEnd) { 1610 DEBUG(dbgs() << "\t\ttaints global " << PrintReg(Other.LI.reg) << ':' 1611 << OtherI->valno->id << '@' << OtherI->start << '\n'); 1612 return false; 1613 } 1614 DEBUG(dbgs() << "\t\ttaints local " << PrintReg(Other.LI.reg) << ':' 1615 << OtherI->valno->id << '@' << OtherI->start 1616 << " to " << End << '\n'); 1617 // A dead def is not a problem. 1618 if (End.isDead()) 1619 break; 1620 TaintExtent.push_back(std::make_pair(End, TaintedLanes)); 1621 1622 // Check for another def in the MBB. 1623 if (++OtherI == Other.LI.end() || OtherI->start >= MBBEnd) 1624 break; 1625 1626 // Lanes written by the new def are no longer tainted. 1627 const Val &OV = Other.Vals[OtherI->valno->id]; 1628 TaintedLanes &= ~OV.WriteLanes; 1629 if (!OV.RedefVNI) 1630 break; 1631 } while (TaintedLanes); 1632 return true; 1633 } 1634 1635 /// Return true if MI uses any of the given Lanes from Reg. 1636 /// This does not include partial redefinitions of Reg. 1637 bool JoinVals::usesLanes(MachineInstr *MI, unsigned Reg, unsigned SubIdx, 1638 unsigned Lanes) { 1639 if (MI->isDebugValue()) 1640 return false; 1641 for (ConstMIOperands MO(MI); MO.isValid(); ++MO) { 1642 if (!MO->isReg() || MO->isDef() || MO->getReg() != Reg) 1643 continue; 1644 if (!MO->readsReg()) 1645 continue; 1646 if (Lanes & TRI->getSubRegIndexLaneMask( 1647 TRI->composeSubRegIndices(SubIdx, MO->getSubReg()))) 1648 return true; 1649 } 1650 return false; 1651 } 1652 1653 bool JoinVals::resolveConflicts(JoinVals &Other) { 1654 for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { 1655 Val &V = Vals[i]; 1656 assert (V.Resolution != CR_Impossible && "Unresolvable conflict"); 1657 if (V.Resolution != CR_Unresolved) 1658 continue; 1659 DEBUG(dbgs() << "\t\tconflict at " << PrintReg(LI.reg) << ':' << i 1660 << '@' << LI.getValNumInfo(i)->def << '\n'); 1661 ++NumLaneConflicts; 1662 assert(V.OtherVNI && "Inconsistent conflict resolution."); 1663 VNInfo *VNI = LI.getValNumInfo(i); 1664 const Val &OtherV = Other.Vals[V.OtherVNI->id]; 1665 1666 // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the 1667 // join, those lanes will be tainted with a wrong value. Get the extent of 1668 // the tainted lanes. 1669 unsigned TaintedLanes = V.WriteLanes & OtherV.ValidLanes; 1670 SmallVector<std::pair<SlotIndex, unsigned>, 8> TaintExtent; 1671 if (!taintExtent(i, TaintedLanes, Other, TaintExtent)) 1672 // Tainted lanes would extend beyond the basic block. 1673 return false; 1674 1675 assert(!TaintExtent.empty() && "There should be at least one conflict."); 1676 1677 // Now look at the instructions from VNI->def to TaintExtent (inclusive). 1678 MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def); 1679 MachineBasicBlock::iterator MI = MBB->begin(); 1680 if (!VNI->isPHIDef()) { 1681 MI = Indexes->getInstructionFromIndex(VNI->def); 1682 // No need to check the instruction defining VNI for reads. 1683 ++MI; 1684 } 1685 assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) && 1686 "Interference ends on VNI->def. Should have been handled earlier"); 1687 MachineInstr *LastMI = 1688 Indexes->getInstructionFromIndex(TaintExtent.front().first); 1689 assert(LastMI && "Range must end at a proper instruction"); 1690 unsigned TaintNum = 0; 1691 for(;;) { 1692 assert(MI != MBB->end() && "Bad LastMI"); 1693 if (usesLanes(MI, Other.LI.reg, Other.SubIdx, TaintedLanes)) { 1694 DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI); 1695 return false; 1696 } 1697 // LastMI is the last instruction to use the current value. 1698 if (&*MI == LastMI) { 1699 if (++TaintNum == TaintExtent.size()) 1700 break; 1701 LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first); 1702 assert(LastMI && "Range must end at a proper instruction"); 1703 TaintedLanes = TaintExtent[TaintNum].second; 1704 } 1705 ++MI; 1706 } 1707 1708 // The tainted lanes are unused. 1709 V.Resolution = CR_Replace; 1710 ++NumLaneResolves; 1711 } 1712 return true; 1713 } 1714 1715 // Determine if ValNo is a copy of a value number in LI or Other.LI that will 1716 // be pruned: 1717 // 1718 // %dst = COPY %src 1719 // %src = COPY %dst <-- This value to be pruned. 1720 // %dst = COPY %src <-- This value is a copy of a pruned value. 1721 // 1722 bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) { 1723 Val &V = Vals[ValNo]; 1724 if (V.Pruned || V.PrunedComputed) 1725 return V.Pruned; 1726 1727 if (V.Resolution != CR_Erase && V.Resolution != CR_Merge) 1728 return V.Pruned; 1729 1730 // Follow copies up the dominator tree and check if any intermediate value 1731 // has been pruned. 1732 V.PrunedComputed = true; 1733 V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this); 1734 return V.Pruned; 1735 } 1736 1737 void JoinVals::pruneValues(JoinVals &Other, 1738 SmallVectorImpl<SlotIndex> &EndPoints) { 1739 for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { 1740 SlotIndex Def = LI.getValNumInfo(i)->def; 1741 switch (Vals[i].Resolution) { 1742 case CR_Keep: 1743 break; 1744 case CR_Replace: { 1745 // This value takes precedence over the value in Other.LI. 1746 LIS->pruneValue(&Other.LI, Def, &EndPoints); 1747 // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF 1748 // instructions are only inserted to provide a live-out value for PHI 1749 // predecessors, so the instruction should simply go away once its value 1750 // has been replaced. 1751 Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; 1752 bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep; 1753 if (!Def.isBlock()) { 1754 // Remove <def,read-undef> flags. This def is now a partial redef. 1755 // Also remove <def,dead> flags since the joined live range will 1756 // continue past this instruction. 1757 for (MIOperands MO(Indexes->getInstructionFromIndex(Def)); 1758 MO.isValid(); ++MO) 1759 if (MO->isReg() && MO->isDef() && MO->getReg() == LI.reg) { 1760 MO->setIsUndef(EraseImpDef); 1761 MO->setIsDead(false); 1762 } 1763 // This value will reach instructions below, but we need to make sure 1764 // the live range also reaches the instruction at Def. 1765 if (!EraseImpDef) 1766 EndPoints.push_back(Def); 1767 } 1768 DEBUG(dbgs() << "\t\tpruned " << PrintReg(Other.LI.reg) << " at " << Def 1769 << ": " << Other.LI << '\n'); 1770 break; 1771 } 1772 case CR_Erase: 1773 case CR_Merge: 1774 if (isPrunedValue(i, Other)) { 1775 // This value is ultimately a copy of a pruned value in LI or Other.LI. 1776 // We can no longer trust the value mapping computed by 1777 // computeAssignment(), the value that was originally copied could have 1778 // been replaced. 1779 LIS->pruneValue(&LI, Def, &EndPoints); 1780 DEBUG(dbgs() << "\t\tpruned all of " << PrintReg(LI.reg) << " at " 1781 << Def << ": " << LI << '\n'); 1782 } 1783 break; 1784 case CR_Unresolved: 1785 case CR_Impossible: 1786 llvm_unreachable("Unresolved conflicts"); 1787 } 1788 } 1789 } 1790 1791 void JoinVals::eraseInstrs(SmallPtrSet<MachineInstr*, 8> &ErasedInstrs, 1792 SmallVectorImpl<unsigned> &ShrinkRegs) { 1793 for (unsigned i = 0, e = LI.getNumValNums(); i != e; ++i) { 1794 // Get the def location before markUnused() below invalidates it. 1795 SlotIndex Def = LI.getValNumInfo(i)->def; 1796 switch (Vals[i].Resolution) { 1797 case CR_Keep: 1798 // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any 1799 // longer. The IMPLICIT_DEF instructions are only inserted by 1800 // PHIElimination to guarantee that all PHI predecessors have a value. 1801 if (!Vals[i].IsImplicitDef || !Vals[i].Pruned) 1802 break; 1803 // Remove value number i from LI. Note that this VNInfo is still present 1804 // in NewVNInfo, so it will appear as an unused value number in the final 1805 // joined interval. 1806 LI.getValNumInfo(i)->markUnused(); 1807 LI.removeValNo(LI.getValNumInfo(i)); 1808 DEBUG(dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LI << '\n'); 1809 // FALL THROUGH. 1810 1811 case CR_Erase: { 1812 MachineInstr *MI = Indexes->getInstructionFromIndex(Def); 1813 assert(MI && "No instruction to erase"); 1814 if (MI->isCopy()) { 1815 unsigned Reg = MI->getOperand(1).getReg(); 1816 if (TargetRegisterInfo::isVirtualRegister(Reg) && 1817 Reg != CP.getSrcReg() && Reg != CP.getDstReg()) 1818 ShrinkRegs.push_back(Reg); 1819 } 1820 ErasedInstrs.insert(MI); 1821 DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI); 1822 LIS->RemoveMachineInstrFromMaps(MI); 1823 MI->eraseFromParent(); 1824 break; 1825 } 1826 default: 1827 break; 1828 } 1829 } 1830 } 1831 1832 bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { 1833 SmallVector<VNInfo*, 16> NewVNInfo; 1834 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 1835 LiveInterval &LHS = LIS->getInterval(CP.getDstReg()); 1836 JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); 1837 JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); 1838 1839 DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS 1840 << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS 1841 << '\n'); 1842 1843 // First compute NewVNInfo and the simple value mappings. 1844 // Detect impossible conflicts early. 1845 if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) 1846 return false; 1847 1848 // Some conflicts can only be resolved after all values have been mapped. 1849 if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals)) 1850 return false; 1851 1852 // All clear, the live ranges can be merged. 1853 1854 // The merging algorithm in LiveInterval::join() can't handle conflicting 1855 // value mappings, so we need to remove any live ranges that overlap a 1856 // CR_Replace resolution. Collect a set of end points that can be used to 1857 // restore the live range after joining. 1858 SmallVector<SlotIndex, 8> EndPoints; 1859 LHSVals.pruneValues(RHSVals, EndPoints); 1860 RHSVals.pruneValues(LHSVals, EndPoints); 1861 1862 // Erase COPY and IMPLICIT_DEF instructions. This may cause some external 1863 // registers to require trimming. 1864 SmallVector<unsigned, 8> ShrinkRegs; 1865 LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); 1866 RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs); 1867 while (!ShrinkRegs.empty()) 1868 LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); 1869 1870 // Join RHS into LHS. 1871 LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo, 1872 MRI); 1873 1874 // Kill flags are going to be wrong if the live ranges were overlapping. 1875 // Eventually, we should simply clear all kill flags when computing live 1876 // ranges. They are reinserted after register allocation. 1877 MRI->clearKillFlags(LHS.reg); 1878 MRI->clearKillFlags(RHS.reg); 1879 1880 if (EndPoints.empty()) 1881 return true; 1882 1883 // Recompute the parts of the live range we had to remove because of 1884 // CR_Replace conflicts. 1885 DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() 1886 << " points: " << LHS << '\n'); 1887 LIS->extendToIndices(&LHS, EndPoints); 1888 return true; 1889 } 1890 1891 /// joinIntervals - Attempt to join these two intervals. On failure, this 1892 /// returns false. 1893 bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { 1894 return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP); 1895 } 1896 1897 namespace { 1898 // DepthMBBCompare - Comparison predicate that sort first based on the loop 1899 // depth of the basic block (the unsigned), and then on the MBB number. 1900 struct DepthMBBCompare { 1901 typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; 1902 bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { 1903 // Deeper loops first 1904 if (LHS.first != RHS.first) 1905 return LHS.first > RHS.first; 1906 1907 // Prefer blocks that are more connected in the CFG. This takes care of 1908 // the most difficult copies first while intervals are short. 1909 unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); 1910 unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); 1911 if (cl != cr) 1912 return cl > cr; 1913 1914 // As a last resort, sort by block number. 1915 return LHS.second->getNumber() < RHS.second->getNumber(); 1916 } 1917 }; 1918 } 1919 1920 // Try joining WorkList copies starting from index From. 1921 // Null out any successful joins. 1922 bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) { 1923 assert(From <= WorkList.size() && "Out of range"); 1924 bool Progress = false; 1925 for (unsigned i = From, e = WorkList.size(); i != e; ++i) { 1926 if (!WorkList[i]) 1927 continue; 1928 // Skip instruction pointers that have already been erased, for example by 1929 // dead code elimination. 1930 if (ErasedInstrs.erase(WorkList[i])) { 1931 WorkList[i] = 0; 1932 continue; 1933 } 1934 bool Again = false; 1935 bool Success = joinCopy(WorkList[i], Again); 1936 Progress |= Success; 1937 if (Success || !Again) 1938 WorkList[i] = 0; 1939 } 1940 return Progress; 1941 } 1942 1943 void 1944 RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { 1945 DEBUG(dbgs() << MBB->getName() << ":\n"); 1946 1947 // Collect all copy-like instructions in MBB. Don't start coalescing anything 1948 // yet, it might invalidate the iterator. 1949 const unsigned PrevSize = WorkList.size(); 1950 for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); 1951 MII != E; ++MII) 1952 if (MII->isCopyLike()) 1953 WorkList.push_back(MII); 1954 1955 // Try coalescing the collected copies immediately, and remove the nulls. 1956 // This prevents the WorkList from getting too large since most copies are 1957 // joinable on the first attempt. 1958 if (copyCoalesceWorkList(PrevSize)) 1959 WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), 1960 (MachineInstr*)0), WorkList.end()); 1961 } 1962 1963 void RegisterCoalescer::joinAllIntervals() { 1964 DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); 1965 assert(WorkList.empty() && "Old data still around."); 1966 1967 if (Loops->empty()) { 1968 // If there are no loops in the function, join intervals in function order. 1969 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); 1970 I != E; ++I) 1971 copyCoalesceInMBB(I); 1972 } else { 1973 // Otherwise, join intervals in inner loops before other intervals. 1974 // Unfortunately we can't just iterate over loop hierarchy here because 1975 // there may be more MBB's than BB's. Collect MBB's for sorting. 1976 1977 // Join intervals in the function prolog first. We want to join physical 1978 // registers with virtual registers before the intervals got too long. 1979 std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; 1980 for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ 1981 MachineBasicBlock *MBB = I; 1982 MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I)); 1983 } 1984 1985 // Sort by loop depth. 1986 std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); 1987 1988 // Finally, join intervals in loop nest order. 1989 for (unsigned i = 0, e = MBBs.size(); i != e; ++i) 1990 copyCoalesceInMBB(MBBs[i].second); 1991 } 1992 1993 // Joining intervals can allow other intervals to be joined. Iteratively join 1994 // until we make no progress. 1995 while (copyCoalesceWorkList()) 1996 /* empty */ ; 1997 } 1998 1999 void RegisterCoalescer::releaseMemory() { 2000 ErasedInstrs.clear(); 2001 WorkList.clear(); 2002 DeadDefs.clear(); 2003 InflateRegs.clear(); 2004 } 2005 2006 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { 2007 MF = &fn; 2008 MRI = &fn.getRegInfo(); 2009 TM = &fn.getTarget(); 2010 TRI = TM->getRegisterInfo(); 2011 TII = TM->getInstrInfo(); 2012 LIS = &getAnalysis<LiveIntervals>(); 2013 LDV = &getAnalysis<LiveDebugVariables>(); 2014 AA = &getAnalysis<AliasAnalysis>(); 2015 Loops = &getAnalysis<MachineLoopInfo>(); 2016 2017 DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" 2018 << "********** Function: " << MF->getName() << '\n'); 2019 2020 if (VerifyCoalescing) 2021 MF->verify(this, "Before register coalescing"); 2022 2023 RegClassInfo.runOnMachineFunction(fn); 2024 2025 // Join (coalesce) intervals if requested. 2026 if (EnableJoining) 2027 joinAllIntervals(); 2028 2029 // After deleting a lot of copies, register classes may be less constrained. 2030 // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 -> 2031 // DPR inflation. 2032 array_pod_sort(InflateRegs.begin(), InflateRegs.end()); 2033 InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), 2034 InflateRegs.end()); 2035 DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); 2036 for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { 2037 unsigned Reg = InflateRegs[i]; 2038 if (MRI->reg_nodbg_empty(Reg)) 2039 continue; 2040 if (MRI->recomputeRegClass(Reg, *TM)) { 2041 DEBUG(dbgs() << PrintReg(Reg) << " inflated to " 2042 << MRI->getRegClass(Reg)->getName() << '\n'); 2043 ++NumInflated; 2044 } 2045 } 2046 2047 DEBUG(dump()); 2048 DEBUG(LDV->dump()); 2049 if (VerifyCoalescing) 2050 MF->verify(this, "After register coalescing"); 2051 return true; 2052 } 2053 2054 /// print - Implement the dump method. 2055 void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { 2056 LIS->print(O, m); 2057 } 2058