1 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Perform peephole optimizations on the machine code: 11 // 12 // - Optimize Extensions 13 // 14 // Optimization of sign / zero extension instructions. It may be extended to 15 // handle other instructions with similar properties. 16 // 17 // On some targets, some instructions, e.g. X86 sign / zero extension, may 18 // leave the source value in the lower part of the result. This optimization 19 // will replace some uses of the pre-extension value with uses of the 20 // sub-register of the results. 21 // 22 // - Optimize Comparisons 23 // 24 // Optimization of comparison instructions. For instance, in this code: 25 // 26 // sub r1, 1 27 // cmp r1, 0 28 // bz L1 29 // 30 // If the "sub" instruction all ready sets (or could be modified to set) the 31 // same flag that the "cmp" instruction sets and that "bz" uses, then we can 32 // eliminate the "cmp" instruction. 33 // 34 // Another instance, in this code: 35 // 36 // sub r1, r3 | sub r1, imm 37 // cmp r3, r1 or cmp r1, r3 | cmp r1, imm 38 // bge L1 39 // 40 // If the branch instruction can use flag from "sub", then we can replace 41 // "sub" with "subs" and eliminate the "cmp" instruction. 42 // 43 // - Optimize Loads: 44 // 45 // Loads that can be folded into a later instruction. A load is foldable 46 // if it loads to virtual registers and the virtual register defined has 47 // a single use. 48 // 49 // - Optimize Copies and Bitcast (more generally, target specific copies): 50 // 51 // Rewrite copies and bitcasts to avoid cross register bank copies 52 // when possible. 53 // E.g., Consider the following example, where capital and lower 54 // letters denote different register file: 55 // b = copy A <-- cross-bank copy 56 // C = copy b <-- cross-bank copy 57 // => 58 // b = copy A <-- cross-bank copy 59 // C = copy A <-- same-bank copy 60 // 61 // E.g., for bitcast: 62 // b = bitcast A <-- cross-bank copy 63 // C = bitcast b <-- cross-bank copy 64 // => 65 // b = bitcast A <-- cross-bank copy 66 // C = copy A <-- same-bank copy 67 //===----------------------------------------------------------------------===// 68 69 #include "llvm/CodeGen/Passes.h" 70 #include "llvm/ADT/DenseMap.h" 71 #include "llvm/ADT/SmallPtrSet.h" 72 #include "llvm/ADT/SmallSet.h" 73 #include "llvm/ADT/Statistic.h" 74 #include "llvm/CodeGen/MachineDominators.h" 75 #include "llvm/CodeGen/MachineInstrBuilder.h" 76 #include "llvm/CodeGen/MachineRegisterInfo.h" 77 #include "llvm/Support/CommandLine.h" 78 #include "llvm/Support/Debug.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Target/TargetInstrInfo.h" 81 #include "llvm/Target/TargetRegisterInfo.h" 82 #include "llvm/Target/TargetSubtargetInfo.h" 83 #include <utility> 84 using namespace llvm; 85 86 #define DEBUG_TYPE "peephole-opt" 87 88 // Optimize Extensions 89 static cl::opt<bool> 90 Aggressive("aggressive-ext-opt", cl::Hidden, 91 cl::desc("Aggressive extension optimization")); 92 93 static cl::opt<bool> 94 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), 95 cl::desc("Disable the peephole optimizer")); 96 97 static cl::opt<bool> 98 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), 99 cl::desc("Disable advanced copy optimization")); 100 101 STATISTIC(NumReuse, "Number of extension results reused"); 102 STATISTIC(NumCmps, "Number of compares eliminated"); 103 STATISTIC(NumImmFold, "Number of move immediate folded"); 104 STATISTIC(NumLoadFold, "Number of loads folded"); 105 STATISTIC(NumSelects, "Number of selects optimized"); 106 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); 107 STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); 108 109 namespace { 110 class ValueTrackerResult; 111 112 class PeepholeOptimizer : public MachineFunctionPass { 113 const TargetInstrInfo *TII; 114 const TargetRegisterInfo *TRI; 115 MachineRegisterInfo *MRI; 116 MachineDominatorTree *DT; // Machine dominator tree 117 118 public: 119 static char ID; // Pass identification 120 PeepholeOptimizer() : MachineFunctionPass(ID) { 121 initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); 122 } 123 124 bool runOnMachineFunction(MachineFunction &MF) override; 125 126 void getAnalysisUsage(AnalysisUsage &AU) const override { 127 AU.setPreservesCFG(); 128 MachineFunctionPass::getAnalysisUsage(AU); 129 if (Aggressive) { 130 AU.addRequired<MachineDominatorTree>(); 131 AU.addPreserved<MachineDominatorTree>(); 132 } 133 } 134 135 private: 136 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 137 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 138 SmallPtrSetImpl<MachineInstr*> &LocalMIs); 139 bool optimizeSelect(MachineInstr *MI, 140 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 141 bool optimizeCondBranch(MachineInstr *MI); 142 bool optimizeCopyOrBitcast(MachineInstr *MI); 143 bool optimizeCoalescableCopy(MachineInstr *MI); 144 bool optimizeUncoalescableCopy(MachineInstr *MI, 145 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 146 bool findNextSource(unsigned &Reg, unsigned &SubReg); 147 bool isMoveImmediate(MachineInstr *MI, 148 SmallSet<unsigned, 4> &ImmDefRegs, 149 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 150 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 151 SmallSet<unsigned, 4> &ImmDefRegs, 152 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 153 bool isLoadFoldable(MachineInstr *MI, 154 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); 155 156 /// \brief Check whether \p MI is understood by the register coalescer 157 /// but may require some rewriting. 158 bool isCoalescableCopy(const MachineInstr &MI) { 159 // SubregToRegs are not interesting, because they are already register 160 // coalescer friendly. 161 return MI.isCopy() || (!DisableAdvCopyOpt && 162 (MI.isRegSequence() || MI.isInsertSubreg() || 163 MI.isExtractSubreg())); 164 } 165 166 /// \brief Check whether \p MI is a copy like instruction that is 167 /// not recognized by the register coalescer. 168 bool isUncoalescableCopy(const MachineInstr &MI) { 169 return MI.isBitcast() || 170 (!DisableAdvCopyOpt && 171 (MI.isRegSequenceLike() || MI.isInsertSubregLike() || 172 MI.isExtractSubregLike())); 173 } 174 }; 175 176 /// \brief Helper class to hold a reply for ValueTracker queries. Contains the 177 /// returned sources for a given search and the instructions where the sources 178 /// were tracked from. 179 class ValueTrackerResult { 180 private: 181 /// Track all sources found by one ValueTracker query. 182 SmallVector<TargetInstrInfo::RegSubRegPair, 2> RegSrcs; 183 184 /// Instruction using the sources in 'RegSrcs'. 185 const MachineInstr *Inst; 186 187 public: 188 ValueTrackerResult() : Inst(nullptr) {} 189 ValueTrackerResult(unsigned Reg, unsigned SubReg) : Inst(nullptr) { 190 addSource(Reg, SubReg); 191 } 192 193 bool isValid() const { return getNumSources() > 0; } 194 195 void setInst(const MachineInstr *I) { Inst = I; } 196 const MachineInstr *getInst() const { return Inst; } 197 198 void clear() { 199 RegSrcs.clear(); 200 Inst = nullptr; 201 } 202 203 void addSource(unsigned SrcReg, unsigned SrcSubReg) { 204 RegSrcs.push_back(TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg)); 205 } 206 207 void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) { 208 assert(Idx < getNumSources() && "Reg pair source out of index"); 209 RegSrcs[Idx] = TargetInstrInfo::RegSubRegPair(SrcReg, SrcSubReg); 210 } 211 212 int getNumSources() const { return RegSrcs.size(); } 213 214 unsigned getSrcReg(int Idx) const { 215 assert(Idx < getNumSources() && "Reg source out of index"); 216 return RegSrcs[Idx].Reg; 217 } 218 219 unsigned getSrcSubReg(int Idx) const { 220 assert(Idx < getNumSources() && "SubReg source out of index"); 221 return RegSrcs[Idx].SubReg; 222 } 223 }; 224 225 /// \brief Helper class to track the possible sources of a value defined by 226 /// a (chain of) copy related instructions. 227 /// Given a definition (instruction and definition index), this class 228 /// follows the use-def chain to find successive suitable sources. 229 /// The given source can be used to rewrite the definition into 230 /// def = COPY src. 231 /// 232 /// For instance, let us consider the following snippet: 233 /// v0 = 234 /// v2 = INSERT_SUBREG v1, v0, sub0 235 /// def = COPY v2.sub0 236 /// 237 /// Using a ValueTracker for def = COPY v2.sub0 will give the following 238 /// suitable sources: 239 /// v2.sub0 and v0. 240 /// Then, def can be rewritten into def = COPY v0. 241 class ValueTracker { 242 private: 243 /// The current point into the use-def chain. 244 const MachineInstr *Def; 245 /// The index of the definition in Def. 246 unsigned DefIdx; 247 /// The sub register index of the definition. 248 unsigned DefSubReg; 249 /// The register where the value can be found. 250 unsigned Reg; 251 /// Specifiy whether or not the value tracking looks through 252 /// complex instructions. When this is false, the value tracker 253 /// bails on everything that is not a copy or a bitcast. 254 /// 255 /// Note: This could have been implemented as a specialized version of 256 /// the ValueTracker class but that would have complicated the code of 257 /// the users of this class. 258 bool UseAdvancedTracking; 259 /// MachineRegisterInfo used to perform tracking. 260 const MachineRegisterInfo &MRI; 261 /// Optional TargetInstrInfo used to perform some complex 262 /// tracking. 263 const TargetInstrInfo *TII; 264 265 /// \brief Dispatcher to the right underlying implementation of 266 /// getNextSource. 267 ValueTrackerResult getNextSourceImpl(); 268 /// \brief Specialized version of getNextSource for Copy instructions. 269 ValueTrackerResult getNextSourceFromCopy(); 270 /// \brief Specialized version of getNextSource for Bitcast instructions. 271 ValueTrackerResult getNextSourceFromBitcast(); 272 /// \brief Specialized version of getNextSource for RegSequence 273 /// instructions. 274 ValueTrackerResult getNextSourceFromRegSequence(); 275 /// \brief Specialized version of getNextSource for InsertSubreg 276 /// instructions. 277 ValueTrackerResult getNextSourceFromInsertSubreg(); 278 /// \brief Specialized version of getNextSource for ExtractSubreg 279 /// instructions. 280 ValueTrackerResult getNextSourceFromExtractSubreg(); 281 /// \brief Specialized version of getNextSource for SubregToReg 282 /// instructions. 283 ValueTrackerResult getNextSourceFromSubregToReg(); 284 285 public: 286 /// \brief Create a ValueTracker instance for the value defined by \p Reg. 287 /// \p DefSubReg represents the sub register index the value tracker will 288 /// track. It does not need to match the sub register index used in the 289 /// definition of \p Reg. 290 /// \p UseAdvancedTracking specifies whether or not the value tracker looks 291 /// through complex instructions. By default (false), it handles only copy 292 /// and bitcast instructions. 293 /// If \p Reg is a physical register, a value tracker constructed with 294 /// this constructor will not find any alternative source. 295 /// Indeed, when \p Reg is a physical register that constructor does not 296 /// know which definition of \p Reg it should track. 297 /// Use the next constructor to track a physical register. 298 ValueTracker(unsigned Reg, unsigned DefSubReg, 299 const MachineRegisterInfo &MRI, 300 bool UseAdvancedTracking = false, 301 const TargetInstrInfo *TII = nullptr) 302 : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), 303 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 304 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 305 Def = MRI.getVRegDef(Reg); 306 DefIdx = MRI.def_begin(Reg).getOperandNo(); 307 } 308 } 309 310 /// \brief Create a ValueTracker instance for the value defined by 311 /// the pair \p MI, \p DefIdx. 312 /// Unlike the other constructor, the value tracker produced by this one 313 /// may be able to find a new source when the definition is a physical 314 /// register. 315 /// This could be useful to rewrite target specific instructions into 316 /// generic copy instructions. 317 ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, 318 const MachineRegisterInfo &MRI, 319 bool UseAdvancedTracking = false, 320 const TargetInstrInfo *TII = nullptr) 321 : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), 322 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 323 assert(DefIdx < Def->getDesc().getNumDefs() && 324 Def->getOperand(DefIdx).isReg() && "Invalid definition"); 325 Reg = Def->getOperand(DefIdx).getReg(); 326 } 327 328 /// \brief Following the use-def chain, get the next available source 329 /// for the tracked value. 330 /// \return A ValueTrackerResult containing a set of registers 331 /// and sub registers with tracked values. A ValueTrackerResult with 332 /// an empty set of registers means no source was found. 333 ValueTrackerResult getNextSource(); 334 335 /// \brief Get the last register where the initial value can be found. 336 /// Initially this is the register of the definition. 337 /// Then, after each successful call to getNextSource, this is the 338 /// register of the last source. 339 unsigned getReg() const { return Reg; } 340 }; 341 } 342 343 char PeepholeOptimizer::ID = 0; 344 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; 345 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", 346 "Peephole Optimizations", false, false) 347 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 348 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", 349 "Peephole Optimizations", false, false) 350 351 /// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads 352 /// a single register and writes a single register and it does not modify the 353 /// source, and if the source value is preserved as a sub-register of the 354 /// result, then replace all reachable uses of the source with the subreg of the 355 /// result. 356 /// 357 /// Do not generate an EXTRACT that is used only in a debug use, as this changes 358 /// the code. Since this code does not currently share EXTRACTs, just ignore all 359 /// debug uses. 360 bool PeepholeOptimizer:: 361 optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 362 SmallPtrSetImpl<MachineInstr*> &LocalMIs) { 363 unsigned SrcReg, DstReg, SubIdx; 364 if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) 365 return false; 366 367 if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 368 TargetRegisterInfo::isPhysicalRegister(SrcReg)) 369 return false; 370 371 if (MRI->hasOneNonDBGUse(SrcReg)) 372 // No other uses. 373 return false; 374 375 // Ensure DstReg can get a register class that actually supports 376 // sub-registers. Don't change the class until we commit. 377 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); 378 DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx); 379 if (!DstRC) 380 return false; 381 382 // The ext instr may be operating on a sub-register of SrcReg as well. 383 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit 384 // register. 385 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of 386 // SrcReg:SubIdx should be replaced. 387 bool UseSrcSubIdx = 388 TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; 389 390 // The source has other uses. See if we can replace the other uses with use of 391 // the result of the extension. 392 SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 393 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 394 ReachedBBs.insert(UI.getParent()); 395 396 // Uses that are in the same BB of uses of the result of the instruction. 397 SmallVector<MachineOperand*, 8> Uses; 398 399 // Uses that the result of the instruction can reach. 400 SmallVector<MachineOperand*, 8> ExtendedUses; 401 402 bool ExtendLife = true; 403 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { 404 MachineInstr *UseMI = UseMO.getParent(); 405 if (UseMI == MI) 406 continue; 407 408 if (UseMI->isPHI()) { 409 ExtendLife = false; 410 continue; 411 } 412 413 // Only accept uses of SrcReg:SubIdx. 414 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) 415 continue; 416 417 // It's an error to translate this: 418 // 419 // %reg1025 = <sext> %reg1024 420 // ... 421 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 422 // 423 // into this: 424 // 425 // %reg1025 = <sext> %reg1024 426 // ... 427 // %reg1027 = COPY %reg1025:4 428 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 429 // 430 // The problem here is that SUBREG_TO_REG is there to assert that an 431 // implicit zext occurs. It doesn't insert a zext instruction. If we allow 432 // the COPY here, it will give us the value after the <sext>, not the 433 // original value of %reg1024 before <sext>. 434 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 435 continue; 436 437 MachineBasicBlock *UseMBB = UseMI->getParent(); 438 if (UseMBB == MBB) { 439 // Local uses that come after the extension. 440 if (!LocalMIs.count(UseMI)) 441 Uses.push_back(&UseMO); 442 } else if (ReachedBBs.count(UseMBB)) { 443 // Non-local uses where the result of the extension is used. Always 444 // replace these unless it's a PHI. 445 Uses.push_back(&UseMO); 446 } else if (Aggressive && DT->dominates(MBB, UseMBB)) { 447 // We may want to extend the live range of the extension result in order 448 // to replace these uses. 449 ExtendedUses.push_back(&UseMO); 450 } else { 451 // Both will be live out of the def MBB anyway. Don't extend live range of 452 // the extension result. 453 ExtendLife = false; 454 break; 455 } 456 } 457 458 if (ExtendLife && !ExtendedUses.empty()) 459 // Extend the liveness of the extension result. 460 Uses.append(ExtendedUses.begin(), ExtendedUses.end()); 461 462 // Now replace all uses. 463 bool Changed = false; 464 if (!Uses.empty()) { 465 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 466 467 // Look for PHI uses of the extended result, we don't want to extend the 468 // liveness of a PHI input. It breaks all kinds of assumptions down 469 // stream. A PHI use is expected to be the kill of its source values. 470 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 471 if (UI.isPHI()) 472 PHIBBs.insert(UI.getParent()); 473 474 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 475 for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 476 MachineOperand *UseMO = Uses[i]; 477 MachineInstr *UseMI = UseMO->getParent(); 478 MachineBasicBlock *UseMBB = UseMI->getParent(); 479 if (PHIBBs.count(UseMBB)) 480 continue; 481 482 // About to add uses of DstReg, clear DstReg's kill flags. 483 if (!Changed) { 484 MRI->clearKillFlags(DstReg); 485 MRI->constrainRegClass(DstReg, DstRC); 486 } 487 488 unsigned NewVR = MRI->createVirtualRegister(RC); 489 MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 490 TII->get(TargetOpcode::COPY), NewVR) 491 .addReg(DstReg, 0, SubIdx); 492 // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. 493 if (UseSrcSubIdx) { 494 Copy->getOperand(0).setSubReg(SubIdx); 495 Copy->getOperand(0).setIsUndef(); 496 } 497 UseMO->setReg(NewVR); 498 ++NumReuse; 499 Changed = true; 500 } 501 } 502 503 return Changed; 504 } 505 506 /// optimizeCmpInstr - If the instruction is a compare and the previous 507 /// instruction it's comparing against all ready sets (or could be modified to 508 /// set) the same flag as the compare, then we can remove the comparison and use 509 /// the flag from the previous instruction. 510 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, 511 MachineBasicBlock *MBB) { 512 // If this instruction is a comparison against zero and isn't comparing a 513 // physical register, we can try to optimize it. 514 unsigned SrcReg, SrcReg2; 515 int CmpMask, CmpValue; 516 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || 517 TargetRegisterInfo::isPhysicalRegister(SrcReg) || 518 (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) 519 return false; 520 521 // Attempt to optimize the comparison instruction. 522 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { 523 ++NumCmps; 524 return true; 525 } 526 527 return false; 528 } 529 530 /// Optimize a select instruction. 531 bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, 532 SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 533 unsigned TrueOp = 0; 534 unsigned FalseOp = 0; 535 bool Optimizable = false; 536 SmallVector<MachineOperand, 4> Cond; 537 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) 538 return false; 539 if (!Optimizable) 540 return false; 541 if (!TII->optimizeSelect(MI, LocalMIs)) 542 return false; 543 MI->eraseFromParent(); 544 ++NumSelects; 545 return true; 546 } 547 548 /// \brief Check if a simpler conditional branch can be 549 // generated 550 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { 551 return TII->optimizeCondBranch(MI); 552 } 553 554 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg) 555 /// share the same register file. 556 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, 557 const TargetRegisterClass *DefRC, 558 unsigned DefSubReg, 559 const TargetRegisterClass *SrcRC, 560 unsigned SrcSubReg) { 561 // Same register class. 562 if (DefRC == SrcRC) 563 return true; 564 565 // Both operands are sub registers. Check if they share a register class. 566 unsigned SrcIdx, DefIdx; 567 if (SrcSubReg && DefSubReg) 568 return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, 569 SrcIdx, DefIdx) != nullptr; 570 // At most one of the register is a sub register, make it Src to avoid 571 // duplicating the test. 572 if (!SrcSubReg) { 573 std::swap(DefSubReg, SrcSubReg); 574 std::swap(DefRC, SrcRC); 575 } 576 577 // One of the register is a sub register, check if we can get a superclass. 578 if (SrcSubReg) 579 return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; 580 // Plain copy. 581 return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; 582 } 583 584 /// \brief Try to find the next source that share the same register file 585 /// for the value defined by \p Reg and \p SubReg. 586 /// When true is returned, \p Reg and \p SubReg are updated with the 587 /// register number and sub-register index of the new source. 588 /// \return False if no alternative sources are available. True otherwise. 589 bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { 590 // Do not try to find a new source for a physical register. 591 // So far we do not have any motivating example for doing that. 592 // Thus, instead of maintaining untested code, we will revisit that if 593 // that changes at some point. 594 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 595 return false; 596 597 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); 598 unsigned DefSubReg = SubReg; 599 600 unsigned Src; 601 unsigned SrcSubReg; 602 bool ShouldRewrite = false; 603 604 // Follow the chain of copies until we reach the top of the use-def chain 605 // or find a more suitable source. 606 ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); 607 do { 608 ValueTrackerResult Res = ValTracker.getNextSource(); 609 if (!Res.isValid()) 610 break; 611 Src = Res.getSrcReg(0); 612 SrcSubReg = Res.getSrcSubReg(0); 613 614 // Do not extend the live-ranges of physical registers as they add 615 // constraints to the register allocator. 616 // Moreover, if we want to extend the live-range of a physical register, 617 // unlike SSA virtual register, we will have to check that they are not 618 // redefine before the related use. 619 if (TargetRegisterInfo::isPhysicalRegister(Src)) 620 break; 621 622 const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); 623 624 // If this source does not incur a cross register bank copy, use it. 625 ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, 626 SrcSubReg); 627 } while (!ShouldRewrite); 628 629 // If we did not find a more suitable source, there is nothing to optimize. 630 if (!ShouldRewrite || Src == Reg) 631 return false; 632 633 Reg = Src; 634 SubReg = SrcSubReg; 635 return true; 636 } 637 638 namespace { 639 /// \brief Helper class to rewrite the arguments of a copy-like instruction. 640 class CopyRewriter { 641 protected: 642 /// The copy-like instruction. 643 MachineInstr &CopyLike; 644 /// The index of the source being rewritten. 645 unsigned CurrentSrcIdx; 646 647 public: 648 CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} 649 650 virtual ~CopyRewriter() {} 651 652 /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and 653 /// the related value that it affects (TrackReg, TrackSubReg). 654 /// A source is considered rewritable if its register class and the 655 /// register class of the related TrackReg may not be register 656 /// coalescer friendly. In other words, given a copy-like instruction 657 /// not all the arguments may be returned at rewritable source, since 658 /// some arguments are none to be register coalescer friendly. 659 /// 660 /// Each call of this method moves the current source to the next 661 /// rewritable source. 662 /// For instance, let CopyLike be the instruction to rewrite. 663 /// CopyLike has one definition and one source: 664 /// dst.dstSubIdx = CopyLike src.srcSubIdx. 665 /// 666 /// The first call will give the first rewritable source, i.e., 667 /// the only source this instruction has: 668 /// (SrcReg, SrcSubReg) = (src, srcSubIdx). 669 /// This source defines the whole definition, i.e., 670 /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). 671 /// 672 /// The second and subsequent calls will return false, has there is only one 673 /// rewritable source. 674 /// 675 /// \return True if a rewritable source has been found, false otherwise. 676 /// The output arguments are valid if and only if true is returned. 677 virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 678 unsigned &TrackReg, 679 unsigned &TrackSubReg) { 680 // If CurrentSrcIdx == 1, this means this function has already been 681 // called once. CopyLike has one defintiion and one argument, thus, 682 // there is nothing else to rewrite. 683 if (!CopyLike.isCopy() || CurrentSrcIdx == 1) 684 return false; 685 // This is the first call to getNextRewritableSource. 686 // Move the CurrentSrcIdx to remember that we made that call. 687 CurrentSrcIdx = 1; 688 // The rewritable source is the argument. 689 const MachineOperand &MOSrc = CopyLike.getOperand(1); 690 SrcReg = MOSrc.getReg(); 691 SrcSubReg = MOSrc.getSubReg(); 692 // What we track are the alternative sources of the definition. 693 const MachineOperand &MODef = CopyLike.getOperand(0); 694 TrackReg = MODef.getReg(); 695 TrackSubReg = MODef.getSubReg(); 696 return true; 697 } 698 699 /// \brief Rewrite the current source with \p NewReg and \p NewSubReg 700 /// if possible. 701 /// \return True if the rewriting was possible, false otherwise. 702 virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { 703 if (!CopyLike.isCopy() || CurrentSrcIdx != 1) 704 return false; 705 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); 706 MOSrc.setReg(NewReg); 707 MOSrc.setSubReg(NewSubReg); 708 return true; 709 } 710 711 /// \brief Rewrite the current source with \p NewSrcReg and \p NewSecSubReg 712 /// by creating a new COPY instruction. \p DefReg and \p DefSubReg contain the 713 /// definition to be rewritten from the original copylike instruction. 714 /// \return The new COPY if the rewriting was possible, nullptr otherwise. 715 /// This is needed to handle Uncoalescable copies, since they are copy 716 /// like instructions that aren't recognized by the register allocator. 717 virtual MachineInstr *RewriteCurrentSource(unsigned DefReg, 718 unsigned DefSubReg, 719 unsigned NewSrcReg, 720 unsigned NewSrcSubReg) { 721 return nullptr; 722 } 723 }; 724 725 /// \brief Helper class to rewrite uncoalescable copy like instructions 726 /// into new COPY (coalescable friendly) instructions. 727 class UncoalescableRewriter : public CopyRewriter { 728 protected: 729 const TargetInstrInfo &TII; 730 MachineRegisterInfo &MRI; 731 /// The number of defs in the bitcast 732 unsigned NumDefs; 733 734 public: 735 UncoalescableRewriter(MachineInstr &MI, const TargetInstrInfo &TII, 736 MachineRegisterInfo &MRI) 737 : CopyRewriter(MI), TII(TII), MRI(MRI) { 738 NumDefs = MI.getDesc().getNumDefs(); 739 } 740 741 /// \brief Get the next rewritable def source (TrackReg, TrackSubReg) 742 /// All such sources need to be considered rewritable in order to 743 /// rewrite a uncoalescable copy-like instruction. This method return 744 /// each definition that must be checked if rewritable. 745 /// 746 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 747 unsigned &TrackReg, 748 unsigned &TrackSubReg) override { 749 // Find the next non-dead definition and continue from there. 750 if (CurrentSrcIdx == NumDefs) 751 return false; 752 753 while (CopyLike.getOperand(CurrentSrcIdx).isDead()) { 754 ++CurrentSrcIdx; 755 if (CurrentSrcIdx == NumDefs) 756 return false; 757 } 758 759 // What we track are the alternative sources of the definition. 760 const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx); 761 TrackReg = MODef.getReg(); 762 TrackSubReg = MODef.getSubReg(); 763 764 CurrentSrcIdx++; 765 return true; 766 } 767 768 /// \brief Rewrite the current source with \p NewSrcReg and \p NewSrcSubReg 769 /// by creating a new COPY instruction. \p DefReg and \p DefSubReg contain the 770 /// definition to be rewritten from the original copylike instruction. 771 /// \return The new COPY if the rewriting was possible, nullptr otherwise. 772 MachineInstr *RewriteCurrentSource(unsigned DefReg, unsigned DefSubReg, 773 unsigned NewSrcReg, 774 unsigned NewSrcSubReg) override { 775 assert(!TargetRegisterInfo::isPhysicalRegister(DefReg) && 776 "We do not rewrite physical registers"); 777 778 const TargetRegisterClass *DefRC = MRI.getRegClass(DefReg); 779 unsigned NewVR = MRI.createVirtualRegister(DefRC); 780 781 MachineInstr *NewCopy = 782 BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(), 783 TII.get(TargetOpcode::COPY), NewVR) 784 .addReg(NewSrcReg, 0, NewSrcSubReg); 785 786 NewCopy->getOperand(0).setSubReg(DefSubReg); 787 if (DefSubReg) 788 NewCopy->getOperand(0).setIsUndef(); 789 790 MRI.replaceRegWith(DefReg, NewVR); 791 MRI.clearKillFlags(NewVR); 792 793 return NewCopy; 794 } 795 }; 796 797 /// \brief Specialized rewriter for INSERT_SUBREG instruction. 798 class InsertSubregRewriter : public CopyRewriter { 799 public: 800 InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { 801 assert(MI.isInsertSubreg() && "Invalid instruction"); 802 } 803 804 /// \brief See CopyRewriter::getNextRewritableSource. 805 /// Here CopyLike has the following form: 806 /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. 807 /// Src1 has the same register class has dst, hence, there is 808 /// nothing to rewrite. 809 /// Src2.src2SubIdx, may not be register coalescer friendly. 810 /// Therefore, the first call to this method returns: 811 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 812 /// (TrackReg, TrackSubReg) = (dst, subIdx). 813 /// 814 /// Subsequence calls will return false. 815 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 816 unsigned &TrackReg, 817 unsigned &TrackSubReg) override { 818 // If we already get the only source we can rewrite, return false. 819 if (CurrentSrcIdx == 2) 820 return false; 821 // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. 822 CurrentSrcIdx = 2; 823 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); 824 SrcReg = MOInsertedReg.getReg(); 825 SrcSubReg = MOInsertedReg.getSubReg(); 826 const MachineOperand &MODef = CopyLike.getOperand(0); 827 828 // We want to track something that is compatible with the 829 // partial definition. 830 TrackReg = MODef.getReg(); 831 if (MODef.getSubReg()) 832 // Bails if we have to compose sub-register indices. 833 return false; 834 TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); 835 return true; 836 } 837 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 838 if (CurrentSrcIdx != 2) 839 return false; 840 // We are rewriting the inserted reg. 841 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 842 MO.setReg(NewReg); 843 MO.setSubReg(NewSubReg); 844 return true; 845 } 846 }; 847 848 /// \brief Specialized rewriter for EXTRACT_SUBREG instruction. 849 class ExtractSubregRewriter : public CopyRewriter { 850 const TargetInstrInfo &TII; 851 852 public: 853 ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) 854 : CopyRewriter(MI), TII(TII) { 855 assert(MI.isExtractSubreg() && "Invalid instruction"); 856 } 857 858 /// \brief See CopyRewriter::getNextRewritableSource. 859 /// Here CopyLike has the following form: 860 /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. 861 /// There is only one rewritable source: Src.subIdx, 862 /// which defines dst.dstSubIdx. 863 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 864 unsigned &TrackReg, 865 unsigned &TrackSubReg) override { 866 // If we already get the only source we can rewrite, return false. 867 if (CurrentSrcIdx == 1) 868 return false; 869 // We are looking at v1 = EXTRACT_SUBREG v0, sub0. 870 CurrentSrcIdx = 1; 871 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); 872 SrcReg = MOExtractedReg.getReg(); 873 // If we have to compose sub-register indices, bails out. 874 if (MOExtractedReg.getSubReg()) 875 return false; 876 877 SrcSubReg = CopyLike.getOperand(2).getImm(); 878 879 // We want to track something that is compatible with the definition. 880 const MachineOperand &MODef = CopyLike.getOperand(0); 881 TrackReg = MODef.getReg(); 882 TrackSubReg = MODef.getSubReg(); 883 return true; 884 } 885 886 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 887 // The only source we can rewrite is the input register. 888 if (CurrentSrcIdx != 1) 889 return false; 890 891 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); 892 893 // If we find a source that does not require to extract something, 894 // rewrite the operation with a copy. 895 if (!NewSubReg) { 896 // Move the current index to an invalid position. 897 // We do not want another call to this method to be able 898 // to do any change. 899 CurrentSrcIdx = -1; 900 // Rewrite the operation as a COPY. 901 // Get rid of the sub-register index. 902 CopyLike.RemoveOperand(2); 903 // Morph the operation into a COPY. 904 CopyLike.setDesc(TII.get(TargetOpcode::COPY)); 905 return true; 906 } 907 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); 908 return true; 909 } 910 }; 911 912 /// \brief Specialized rewriter for REG_SEQUENCE instruction. 913 class RegSequenceRewriter : public CopyRewriter { 914 public: 915 RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { 916 assert(MI.isRegSequence() && "Invalid instruction"); 917 } 918 919 /// \brief See CopyRewriter::getNextRewritableSource. 920 /// Here CopyLike has the following form: 921 /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. 922 /// Each call will return a different source, walking all the available 923 /// source. 924 /// 925 /// The first call returns: 926 /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). 927 /// (TrackReg, TrackSubReg) = (dst, subIdx1). 928 /// 929 /// The second call returns: 930 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 931 /// (TrackReg, TrackSubReg) = (dst, subIdx2). 932 /// 933 /// And so on, until all the sources have been traversed, then 934 /// it returns false. 935 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 936 unsigned &TrackReg, 937 unsigned &TrackSubReg) override { 938 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. 939 940 // If this is the first call, move to the first argument. 941 if (CurrentSrcIdx == 0) { 942 CurrentSrcIdx = 1; 943 } else { 944 // Otherwise, move to the next argument and check that it is valid. 945 CurrentSrcIdx += 2; 946 if (CurrentSrcIdx >= CopyLike.getNumOperands()) 947 return false; 948 } 949 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); 950 SrcReg = MOInsertedReg.getReg(); 951 // If we have to compose sub-register indices, bails out. 952 if ((SrcSubReg = MOInsertedReg.getSubReg())) 953 return false; 954 955 // We want to track something that is compatible with the related 956 // partial definition. 957 TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); 958 959 const MachineOperand &MODef = CopyLike.getOperand(0); 960 TrackReg = MODef.getReg(); 961 // If we have to compose sub-registers, bails. 962 return MODef.getSubReg() == 0; 963 } 964 965 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 966 // We cannot rewrite out of bound operands. 967 // Moreover, rewritable sources are at odd positions. 968 if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) 969 return false; 970 971 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 972 MO.setReg(NewReg); 973 MO.setSubReg(NewSubReg); 974 return true; 975 } 976 }; 977 } // End namespace. 978 979 /// \brief Get the appropriated CopyRewriter for \p MI. 980 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr 981 /// if no rewriter works for \p MI. 982 static CopyRewriter *getCopyRewriter(MachineInstr &MI, 983 const TargetInstrInfo &TII, 984 MachineRegisterInfo &MRI) { 985 // Handle uncoalescable copy-like instructions. 986 if (MI.isBitcast() || (MI.isRegSequenceLike() || MI.isInsertSubregLike() || 987 MI.isExtractSubregLike())) 988 return new UncoalescableRewriter(MI, TII, MRI); 989 990 switch (MI.getOpcode()) { 991 default: 992 return nullptr; 993 case TargetOpcode::COPY: 994 return new CopyRewriter(MI); 995 case TargetOpcode::INSERT_SUBREG: 996 return new InsertSubregRewriter(MI); 997 case TargetOpcode::EXTRACT_SUBREG: 998 return new ExtractSubregRewriter(MI, TII); 999 case TargetOpcode::REG_SEQUENCE: 1000 return new RegSequenceRewriter(MI); 1001 } 1002 llvm_unreachable(nullptr); 1003 } 1004 1005 /// \brief Optimize generic copy instructions to avoid cross 1006 /// register bank copy. The optimization looks through a chain of 1007 /// copies and tries to find a source that has a compatible register 1008 /// class. 1009 /// Two register classes are considered to be compatible if they share 1010 /// the same register bank. 1011 /// New copies issued by this optimization are register allocator 1012 /// friendly. This optimization does not remove any copy as it may 1013 /// overconstraint the register allocator, but replaces some operands 1014 /// when possible. 1015 /// \pre isCoalescableCopy(*MI) is true. 1016 /// \return True, when \p MI has been rewritten. False otherwise. 1017 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { 1018 assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); 1019 assert(MI->getDesc().getNumDefs() == 1 && 1020 "Coalescer can understand multiple defs?!"); 1021 const MachineOperand &MODef = MI->getOperand(0); 1022 // Do not rewrite physical definitions. 1023 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 1024 return false; 1025 1026 bool Changed = false; 1027 // Get the right rewriter for the current copy. 1028 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); 1029 // If none exists, bails out. 1030 if (!CpyRewriter) 1031 return false; 1032 // Rewrite each rewritable source. 1033 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; 1034 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, 1035 TrackSubReg)) { 1036 unsigned NewSrc = TrackReg; 1037 unsigned NewSubReg = TrackSubReg; 1038 // Try to find a more suitable source. If we failed to do so, or get the 1039 // actual source, move to the next source. 1040 if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) 1041 continue; 1042 // Rewrite source. 1043 if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { 1044 // We may have extended the live-range of NewSrc, account for that. 1045 MRI->clearKillFlags(NewSrc); 1046 Changed = true; 1047 } 1048 } 1049 // TODO: We could have a clean-up method to tidy the instruction. 1050 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 1051 // => v0 = COPY v1 1052 // Currently we haven't seen motivating example for that and we 1053 // want to avoid untested code. 1054 NumRewrittenCopies += Changed; 1055 return Changed; 1056 } 1057 1058 /// \brief Optimize copy-like instructions to create 1059 /// register coalescer friendly instruction. 1060 /// The optimization tries to kill-off the \p MI by looking 1061 /// through a chain of copies to find a source that has a compatible 1062 /// register class. 1063 /// If such a source is found, it replace \p MI by a generic COPY 1064 /// operation. 1065 /// \pre isUncoalescableCopy(*MI) is true. 1066 /// \return True, when \p MI has been optimized. In that case, \p MI has 1067 /// been removed from its parent. 1068 /// All COPY instructions created, are inserted in \p LocalMIs. 1069 bool PeepholeOptimizer::optimizeUncoalescableCopy( 1070 MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 1071 assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); 1072 1073 // Check if we can rewrite all the values defined by this instruction. 1074 SmallVector< 1075 std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, 1076 4> RewritePairs; 1077 // Get the right rewriter for the current copy. 1078 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII, *MRI)); 1079 // If none exists, bails out. 1080 if (!CpyRewriter) 1081 return false; 1082 1083 // Rewrite each rewritable source by generating new COPYs. This works 1084 // differently from optimizeCoalescableCopy since it first makes sure that all 1085 // definitions can be rewritten. 1086 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; 1087 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, 1088 TrackSubReg)) { 1089 // If a physical register is here, this is probably for a good reason. 1090 // Do not rewrite that. 1091 if (TargetRegisterInfo::isPhysicalRegister(TrackReg)) 1092 return false; 1093 1094 // If we do not know how to rewrite this definition, there is no point 1095 // in trying to kill this instruction. 1096 TargetInstrInfo::RegSubRegPair Def(TrackReg, TrackSubReg); 1097 TargetInstrInfo::RegSubRegPair Src = Def; 1098 if (!findNextSource(Src.Reg, Src.SubReg)) 1099 return false; 1100 1101 RewritePairs.push_back(std::make_pair(Def, Src)); 1102 } 1103 1104 // The change is possible for all defs, do it. 1105 for (const auto &PairDefSrc : RewritePairs) { 1106 const auto &Def = PairDefSrc.first; 1107 const auto &Src = PairDefSrc.second; 1108 1109 // Rewrite the "copy" in a way the register coalescer understands. 1110 MachineInstr *NewCopy = CpyRewriter->RewriteCurrentSource( 1111 Def.Reg, Def.SubReg, Src.Reg, Src.SubReg); 1112 assert(NewCopy && "Should be able to always generate a new copy"); 1113 1114 // We extended the lifetime of Src and clear the kill flags to 1115 // account for that. 1116 MRI->clearKillFlags(Src.Reg); 1117 LocalMIs.insert(NewCopy); 1118 } 1119 // MI is now dead. 1120 MI->eraseFromParent(); 1121 ++NumUncoalescableCopies; 1122 return true; 1123 } 1124 1125 /// isLoadFoldable - Check whether MI is a candidate for folding into a later 1126 /// instruction. We only fold loads to virtual registers and the virtual 1127 /// register defined has a single use. 1128 bool PeepholeOptimizer::isLoadFoldable( 1129 MachineInstr *MI, 1130 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { 1131 if (!MI->canFoldAsLoad() || !MI->mayLoad()) 1132 return false; 1133 const MCInstrDesc &MCID = MI->getDesc(); 1134 if (MCID.getNumDefs() != 1) 1135 return false; 1136 1137 unsigned Reg = MI->getOperand(0).getReg(); 1138 // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting 1139 // loads. It should be checked when processing uses of the load, since 1140 // uses can be removed during peephole. 1141 if (!MI->getOperand(0).getSubReg() && 1142 TargetRegisterInfo::isVirtualRegister(Reg) && 1143 MRI->hasOneNonDBGUse(Reg)) { 1144 FoldAsLoadDefCandidates.insert(Reg); 1145 return true; 1146 } 1147 return false; 1148 } 1149 1150 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, 1151 SmallSet<unsigned, 4> &ImmDefRegs, 1152 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1153 const MCInstrDesc &MCID = MI->getDesc(); 1154 if (!MI->isMoveImmediate()) 1155 return false; 1156 if (MCID.getNumDefs() != 1) 1157 return false; 1158 unsigned Reg = MI->getOperand(0).getReg(); 1159 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1160 ImmDefMIs.insert(std::make_pair(Reg, MI)); 1161 ImmDefRegs.insert(Reg); 1162 return true; 1163 } 1164 1165 return false; 1166 } 1167 1168 /// foldImmediate - Try folding register operands that are defined by move 1169 /// immediate instructions, i.e. a trivial constant folding optimization, if 1170 /// and only if the def and use are in the same BB. 1171 bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 1172 SmallSet<unsigned, 4> &ImmDefRegs, 1173 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1174 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1175 MachineOperand &MO = MI->getOperand(i); 1176 if (!MO.isReg() || MO.isDef()) 1177 continue; 1178 unsigned Reg = MO.getReg(); 1179 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1180 continue; 1181 if (ImmDefRegs.count(Reg) == 0) 1182 continue; 1183 DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); 1184 assert(II != ImmDefMIs.end()); 1185 if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { 1186 ++NumImmFold; 1187 return true; 1188 } 1189 } 1190 return false; 1191 } 1192 1193 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 1194 if (skipOptnoneFunction(*MF.getFunction())) 1195 return false; 1196 1197 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); 1198 DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); 1199 1200 if (DisablePeephole) 1201 return false; 1202 1203 TII = MF.getSubtarget().getInstrInfo(); 1204 TRI = MF.getSubtarget().getRegisterInfo(); 1205 MRI = &MF.getRegInfo(); 1206 DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; 1207 1208 bool Changed = false; 1209 1210 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 1211 MachineBasicBlock *MBB = &*I; 1212 1213 bool SeenMoveImm = false; 1214 1215 // During this forward scan, at some point it needs to answer the question 1216 // "given a pointer to an MI in the current BB, is it located before or 1217 // after the current instruction". 1218 // To perform this, the following set keeps track of the MIs already seen 1219 // during the scan, if a MI is not in the set, it is assumed to be located 1220 // after. Newly created MIs have to be inserted in the set as well. 1221 SmallPtrSet<MachineInstr*, 16> LocalMIs; 1222 SmallSet<unsigned, 4> ImmDefRegs; 1223 DenseMap<unsigned, MachineInstr*> ImmDefMIs; 1224 SmallSet<unsigned, 16> FoldAsLoadDefCandidates; 1225 1226 for (MachineBasicBlock::iterator 1227 MII = I->begin(), MIE = I->end(); MII != MIE; ) { 1228 MachineInstr *MI = &*MII; 1229 // We may be erasing MI below, increment MII now. 1230 ++MII; 1231 LocalMIs.insert(MI); 1232 1233 // Skip debug values. They should not affect this peephole optimization. 1234 if (MI->isDebugValue()) 1235 continue; 1236 1237 // If we run into an instruction we can't fold across, discard 1238 // the load candidates. 1239 if (MI->isLoadFoldBarrier()) 1240 FoldAsLoadDefCandidates.clear(); 1241 1242 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || 1243 MI->isKill() || MI->isInlineAsm() || 1244 MI->hasUnmodeledSideEffects()) 1245 continue; 1246 1247 if ((isUncoalescableCopy(*MI) && 1248 optimizeUncoalescableCopy(MI, LocalMIs)) || 1249 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || 1250 (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { 1251 // MI is deleted. 1252 LocalMIs.erase(MI); 1253 Changed = true; 1254 continue; 1255 } 1256 1257 if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { 1258 Changed = true; 1259 continue; 1260 } 1261 1262 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { 1263 // MI is just rewritten. 1264 Changed = true; 1265 continue; 1266 } 1267 1268 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { 1269 SeenMoveImm = true; 1270 } else { 1271 Changed |= optimizeExtInstr(MI, MBB, LocalMIs); 1272 // optimizeExtInstr might have created new instructions after MI 1273 // and before the already incremented MII. Adjust MII so that the 1274 // next iteration sees the new instructions. 1275 MII = MI; 1276 ++MII; 1277 if (SeenMoveImm) 1278 Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); 1279 } 1280 1281 // Check whether MI is a load candidate for folding into a later 1282 // instruction. If MI is not a candidate, check whether we can fold an 1283 // earlier load into MI. 1284 if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && 1285 !FoldAsLoadDefCandidates.empty()) { 1286 const MCInstrDesc &MIDesc = MI->getDesc(); 1287 for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); 1288 ++i) { 1289 const MachineOperand &MOp = MI->getOperand(i); 1290 if (!MOp.isReg()) 1291 continue; 1292 unsigned FoldAsLoadDefReg = MOp.getReg(); 1293 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { 1294 // We need to fold load after optimizeCmpInstr, since 1295 // optimizeCmpInstr can enable folding by converting SUB to CMP. 1296 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and 1297 // we need it for markUsesInDebugValueAsUndef(). 1298 unsigned FoldedReg = FoldAsLoadDefReg; 1299 MachineInstr *DefMI = nullptr; 1300 MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, 1301 FoldAsLoadDefReg, 1302 DefMI); 1303 if (FoldMI) { 1304 // Update LocalMIs since we replaced MI with FoldMI and deleted 1305 // DefMI. 1306 DEBUG(dbgs() << "Replacing: " << *MI); 1307 DEBUG(dbgs() << " With: " << *FoldMI); 1308 LocalMIs.erase(MI); 1309 LocalMIs.erase(DefMI); 1310 LocalMIs.insert(FoldMI); 1311 MI->eraseFromParent(); 1312 DefMI->eraseFromParent(); 1313 MRI->markUsesInDebugValueAsUndef(FoldedReg); 1314 FoldAsLoadDefCandidates.erase(FoldedReg); 1315 ++NumLoadFold; 1316 // MI is replaced with FoldMI. 1317 Changed = true; 1318 break; 1319 } 1320 } 1321 } 1322 } 1323 } 1324 } 1325 1326 return Changed; 1327 } 1328 1329 ValueTrackerResult ValueTracker::getNextSourceFromCopy() { 1330 assert(Def->isCopy() && "Invalid definition"); 1331 // Copy instruction are supposed to be: Def = Src. 1332 // If someone breaks this assumption, bad things will happen everywhere. 1333 assert(Def->getNumOperands() == 2 && "Invalid number of operands"); 1334 1335 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1336 // If we look for a different subreg, it means we want a subreg of src. 1337 // Bails as we do not support composing subreg yet. 1338 return ValueTrackerResult(); 1339 // Otherwise, we want the whole source. 1340 const MachineOperand &Src = Def->getOperand(1); 1341 return ValueTrackerResult(Src.getReg(), Src.getSubReg()); 1342 } 1343 1344 ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { 1345 assert(Def->isBitcast() && "Invalid definition"); 1346 1347 // Bail if there are effects that a plain copy will not expose. 1348 if (Def->hasUnmodeledSideEffects()) 1349 return ValueTrackerResult(); 1350 1351 // Bitcasts with more than one def are not supported. 1352 if (Def->getDesc().getNumDefs() != 1) 1353 return ValueTrackerResult(); 1354 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1355 // If we look for a different subreg, it means we want a subreg of the src. 1356 // Bails as we do not support composing subreg yet. 1357 return ValueTrackerResult(); 1358 1359 unsigned SrcIdx = Def->getNumOperands(); 1360 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; 1361 ++OpIdx) { 1362 const MachineOperand &MO = Def->getOperand(OpIdx); 1363 if (!MO.isReg() || !MO.getReg()) 1364 continue; 1365 assert(!MO.isDef() && "We should have skipped all the definitions by now"); 1366 if (SrcIdx != EndOpIdx) 1367 // Multiple sources? 1368 return ValueTrackerResult(); 1369 SrcIdx = OpIdx; 1370 } 1371 const MachineOperand &Src = Def->getOperand(SrcIdx); 1372 return ValueTrackerResult(Src.getReg(), Src.getSubReg()); 1373 } 1374 1375 ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() { 1376 assert((Def->isRegSequence() || Def->isRegSequenceLike()) && 1377 "Invalid definition"); 1378 1379 if (Def->getOperand(DefIdx).getSubReg()) 1380 // If we are composing subreg, bails out. 1381 // The case we are checking is Def.<subreg> = REG_SEQUENCE. 1382 // This should almost never happen as the SSA property is tracked at 1383 // the register level (as opposed to the subreg level). 1384 // I.e., 1385 // Def.sub0 = 1386 // Def.sub1 = 1387 // is a valid SSA representation for Def.sub0 and Def.sub1, but not for 1388 // Def. Thus, it must not be generated. 1389 // However, some code could theoretically generates a single 1390 // Def.sub0 (i.e, not defining the other subregs) and we would 1391 // have this case. 1392 // If we can ascertain (or force) that this never happens, we could 1393 // turn that into an assertion. 1394 return ValueTrackerResult(); 1395 1396 if (!TII) 1397 // We could handle the REG_SEQUENCE here, but we do not want to 1398 // duplicate the code from the generic TII. 1399 return ValueTrackerResult(); 1400 1401 SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; 1402 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) 1403 return ValueTrackerResult(); 1404 1405 // We are looking at: 1406 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... 1407 // Check if one of the operand defines the subreg we are interested in. 1408 for (auto &RegSeqInput : RegSeqInputRegs) { 1409 if (RegSeqInput.SubIdx == DefSubReg) { 1410 if (RegSeqInput.SubReg) 1411 // Bails if we have to compose sub registers. 1412 return ValueTrackerResult(); 1413 1414 return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg); 1415 } 1416 } 1417 1418 // If the subreg we are tracking is super-defined by another subreg, 1419 // we could follow this value. However, this would require to compose 1420 // the subreg and we do not do that for now. 1421 return ValueTrackerResult(); 1422 } 1423 1424 ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() { 1425 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && 1426 "Invalid definition"); 1427 1428 if (Def->getOperand(DefIdx).getSubReg()) 1429 // If we are composing subreg, bails out. 1430 // Same remark as getNextSourceFromRegSequence. 1431 // I.e., this may be turned into an assert. 1432 return ValueTrackerResult(); 1433 1434 if (!TII) 1435 // We could handle the REG_SEQUENCE here, but we do not want to 1436 // duplicate the code from the generic TII. 1437 return ValueTrackerResult(); 1438 1439 TargetInstrInfo::RegSubRegPair BaseReg; 1440 TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; 1441 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) 1442 return ValueTrackerResult(); 1443 1444 // We are looking at: 1445 // Def = INSERT_SUBREG v0, v1, sub1 1446 // There are two cases: 1447 // 1. DefSubReg == sub1, get v1. 1448 // 2. DefSubReg != sub1, the value may be available through v0. 1449 1450 // #1 Check if the inserted register matches the required sub index. 1451 if (InsertedReg.SubIdx == DefSubReg) { 1452 return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg); 1453 } 1454 // #2 Otherwise, if the sub register we are looking for is not partial 1455 // defined by the inserted element, we can look through the main 1456 // register (v0). 1457 const MachineOperand &MODef = Def->getOperand(DefIdx); 1458 // If the result register (Def) and the base register (v0) do not 1459 // have the same register class or if we have to compose 1460 // subregisters, bails out. 1461 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || 1462 BaseReg.SubReg) 1463 return ValueTrackerResult(); 1464 1465 // Get the TRI and check if the inserted sub-register overlaps with the 1466 // sub-register we are tracking. 1467 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 1468 if (!TRI || 1469 (TRI->getSubRegIndexLaneMask(DefSubReg) & 1470 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) 1471 return ValueTrackerResult(); 1472 // At this point, the value is available in v0 via the same subreg 1473 // we used for Def. 1474 return ValueTrackerResult(BaseReg.Reg, DefSubReg); 1475 } 1476 1477 ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() { 1478 assert((Def->isExtractSubreg() || 1479 Def->isExtractSubregLike()) && "Invalid definition"); 1480 // We are looking at: 1481 // Def = EXTRACT_SUBREG v0, sub0 1482 1483 // Bails if we have to compose sub registers. 1484 // Indeed, if DefSubReg != 0, we would have to compose it with sub0. 1485 if (DefSubReg) 1486 return ValueTrackerResult(); 1487 1488 if (!TII) 1489 // We could handle the EXTRACT_SUBREG here, but we do not want to 1490 // duplicate the code from the generic TII. 1491 return ValueTrackerResult(); 1492 1493 TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; 1494 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) 1495 return ValueTrackerResult(); 1496 1497 // Bails if we have to compose sub registers. 1498 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. 1499 if (ExtractSubregInputReg.SubReg) 1500 return ValueTrackerResult(); 1501 // Otherwise, the value is available in the v0.sub0. 1502 return ValueTrackerResult(ExtractSubregInputReg.Reg, ExtractSubregInputReg.SubIdx); 1503 } 1504 1505 ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() { 1506 assert(Def->isSubregToReg() && "Invalid definition"); 1507 // We are looking at: 1508 // Def = SUBREG_TO_REG Imm, v0, sub0 1509 1510 // Bails if we have to compose sub registers. 1511 // If DefSubReg != sub0, we would have to check that all the bits 1512 // we track are included in sub0 and if yes, we would have to 1513 // determine the right subreg in v0. 1514 if (DefSubReg != Def->getOperand(3).getImm()) 1515 return ValueTrackerResult(); 1516 // Bails if we have to compose sub registers. 1517 // Likewise, if v0.subreg != 0, we would have to compose it with sub0. 1518 if (Def->getOperand(2).getSubReg()) 1519 return ValueTrackerResult(); 1520 1521 return ValueTrackerResult(Def->getOperand(2).getReg(), 1522 Def->getOperand(3).getImm()); 1523 } 1524 1525 ValueTrackerResult ValueTracker::getNextSourceImpl() { 1526 assert(Def && "This method needs a valid definition"); 1527 1528 assert( 1529 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && 1530 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); 1531 if (Def->isCopy()) 1532 return getNextSourceFromCopy(); 1533 if (Def->isBitcast()) 1534 return getNextSourceFromBitcast(); 1535 // All the remaining cases involve "complex" instructions. 1536 // Bails if we did not ask for the advanced tracking. 1537 if (!UseAdvancedTracking) 1538 return ValueTrackerResult(); 1539 if (Def->isRegSequence() || Def->isRegSequenceLike()) 1540 return getNextSourceFromRegSequence(); 1541 if (Def->isInsertSubreg() || Def->isInsertSubregLike()) 1542 return getNextSourceFromInsertSubreg(); 1543 if (Def->isExtractSubreg() || Def->isExtractSubregLike()) 1544 return getNextSourceFromExtractSubreg(); 1545 if (Def->isSubregToReg()) 1546 return getNextSourceFromSubregToReg(); 1547 return ValueTrackerResult(); 1548 } 1549 1550 ValueTrackerResult ValueTracker::getNextSource() { 1551 // If we reach a point where we cannot move up in the use-def chain, 1552 // there is nothing we can get. 1553 if (!Def) 1554 return ValueTrackerResult(); 1555 1556 ValueTrackerResult Res = getNextSourceImpl(); 1557 if (Res.isValid()) { 1558 // Update definition, definition index, and subregister for the 1559 // next call of getNextSource. 1560 // Update the current register. 1561 bool OneRegSrc = Res.getNumSources() == 1; 1562 if (OneRegSrc) 1563 Reg = Res.getSrcReg(0); 1564 // Update the result before moving up in the use-def chain 1565 // with the instruction containing the last found sources. 1566 Res.setInst(Def); 1567 1568 // If we can still move up in the use-def chain, move to the next 1569 // definition. 1570 if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) { 1571 Def = MRI.getVRegDef(Reg); 1572 DefIdx = MRI.def_begin(Reg).getOperandNo(); 1573 DefSubReg = Res.getSrcSubReg(0); 1574 return Res; 1575 } 1576 } 1577 // If we end up here, this means we will not be able to find another source 1578 // for the next iteration. Make sure any new call to getNextSource bails out 1579 // early by cutting the use-def chain. 1580 Def = nullptr; 1581 return Res; 1582 } 1583