1 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Perform peephole optimizations on the machine code: 11 // 12 // - Optimize Extensions 13 // 14 // Optimization of sign / zero extension instructions. It may be extended to 15 // handle other instructions with similar properties. 16 // 17 // On some targets, some instructions, e.g. X86 sign / zero extension, may 18 // leave the source value in the lower part of the result. This optimization 19 // will replace some uses of the pre-extension value with uses of the 20 // sub-register of the results. 21 // 22 // - Optimize Comparisons 23 // 24 // Optimization of comparison instructions. For instance, in this code: 25 // 26 // sub r1, 1 27 // cmp r1, 0 28 // bz L1 29 // 30 // If the "sub" instruction all ready sets (or could be modified to set) the 31 // same flag that the "cmp" instruction sets and that "bz" uses, then we can 32 // eliminate the "cmp" instruction. 33 // 34 // Another instance, in this code: 35 // 36 // sub r1, r3 | sub r1, imm 37 // cmp r3, r1 or cmp r1, r3 | cmp r1, imm 38 // bge L1 39 // 40 // If the branch instruction can use flag from "sub", then we can replace 41 // "sub" with "subs" and eliminate the "cmp" instruction. 42 // 43 // - Optimize Loads: 44 // 45 // Loads that can be folded into a later instruction. A load is foldable 46 // if it loads to virtual registers and the virtual register defined has 47 // a single use. 48 // 49 // - Optimize Copies and Bitcast (more generally, target specific copies): 50 // 51 // Rewrite copies and bitcasts to avoid cross register bank copies 52 // when possible. 53 // E.g., Consider the following example, where capital and lower 54 // letters denote different register file: 55 // b = copy A <-- cross-bank copy 56 // C = copy b <-- cross-bank copy 57 // => 58 // b = copy A <-- cross-bank copy 59 // C = copy A <-- same-bank copy 60 // 61 // E.g., for bitcast: 62 // b = bitcast A <-- cross-bank copy 63 // C = bitcast b <-- cross-bank copy 64 // => 65 // b = bitcast A <-- cross-bank copy 66 // C = copy A <-- same-bank copy 67 //===----------------------------------------------------------------------===// 68 69 #include "llvm/CodeGen/Passes.h" 70 #include "llvm/ADT/DenseMap.h" 71 #include "llvm/ADT/SmallPtrSet.h" 72 #include "llvm/ADT/SmallSet.h" 73 #include "llvm/ADT/Statistic.h" 74 #include "llvm/CodeGen/MachineDominators.h" 75 #include "llvm/CodeGen/MachineInstrBuilder.h" 76 #include "llvm/CodeGen/MachineRegisterInfo.h" 77 #include "llvm/Support/CommandLine.h" 78 #include "llvm/Support/Debug.h" 79 #include "llvm/Target/TargetInstrInfo.h" 80 #include "llvm/Target/TargetRegisterInfo.h" 81 #include "llvm/Target/TargetSubtargetInfo.h" 82 #include <utility> 83 using namespace llvm; 84 85 #define DEBUG_TYPE "peephole-opt" 86 87 // Optimize Extensions 88 static cl::opt<bool> 89 Aggressive("aggressive-ext-opt", cl::Hidden, 90 cl::desc("Aggressive extension optimization")); 91 92 static cl::opt<bool> 93 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), 94 cl::desc("Disable the peephole optimizer")); 95 96 static cl::opt<bool> 97 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), 98 cl::desc("Disable advanced copy optimization")); 99 100 STATISTIC(NumReuse, "Number of extension results reused"); 101 STATISTIC(NumCmps, "Number of compares eliminated"); 102 STATISTIC(NumImmFold, "Number of move immediate folded"); 103 STATISTIC(NumLoadFold, "Number of loads folded"); 104 STATISTIC(NumSelects, "Number of selects optimized"); 105 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); 106 STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); 107 108 namespace { 109 class PeepholeOptimizer : public MachineFunctionPass { 110 const TargetMachine *TM; 111 const TargetInstrInfo *TII; 112 MachineRegisterInfo *MRI; 113 MachineDominatorTree *DT; // Machine dominator tree 114 115 public: 116 static char ID; // Pass identification 117 PeepholeOptimizer() : MachineFunctionPass(ID) { 118 initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); 119 } 120 121 bool runOnMachineFunction(MachineFunction &MF) override; 122 123 void getAnalysisUsage(AnalysisUsage &AU) const override { 124 AU.setPreservesCFG(); 125 MachineFunctionPass::getAnalysisUsage(AU); 126 if (Aggressive) { 127 AU.addRequired<MachineDominatorTree>(); 128 AU.addPreserved<MachineDominatorTree>(); 129 } 130 } 131 132 private: 133 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 134 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 135 SmallPtrSetImpl<MachineInstr*> &LocalMIs); 136 bool optimizeSelect(MachineInstr *MI); 137 bool optimizeCopyOrBitcast(MachineInstr *MI); 138 bool optimizeCoalescableCopy(MachineInstr *MI); 139 bool optimizeUncoalescableCopy(MachineInstr *MI, 140 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 141 bool findNextSource(unsigned &Reg, unsigned &SubReg); 142 bool isMoveImmediate(MachineInstr *MI, 143 SmallSet<unsigned, 4> &ImmDefRegs, 144 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 145 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 146 SmallSet<unsigned, 4> &ImmDefRegs, 147 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 148 bool isLoadFoldable(MachineInstr *MI, 149 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); 150 151 /// \brief Check whether \p MI is understood by the register coalescer 152 /// but may require some rewriting. 153 bool isCoalescableCopy(const MachineInstr &MI) { 154 // SubregToRegs are not interesting, because they are already register 155 // coalescer friendly. 156 return MI.isCopy() || (!DisableAdvCopyOpt && 157 (MI.isRegSequence() || MI.isInsertSubreg() || 158 MI.isExtractSubreg())); 159 } 160 161 /// \brief Check whether \p MI is a copy like instruction that is 162 /// not recognized by the register coalescer. 163 bool isUncoalescableCopy(const MachineInstr &MI) { 164 return MI.isBitcast() || 165 (!DisableAdvCopyOpt && 166 (MI.isRegSequenceLike() || MI.isInsertSubregLike() || 167 MI.isExtractSubregLike())); 168 } 169 }; 170 171 /// \brief Helper class to track the possible sources of a value defined by 172 /// a (chain of) copy related instructions. 173 /// Given a definition (instruction and definition index), this class 174 /// follows the use-def chain to find successive suitable sources. 175 /// The given source can be used to rewrite the definition into 176 /// def = COPY src. 177 /// 178 /// For instance, let us consider the following snippet: 179 /// v0 = 180 /// v2 = INSERT_SUBREG v1, v0, sub0 181 /// def = COPY v2.sub0 182 /// 183 /// Using a ValueTracker for def = COPY v2.sub0 will give the following 184 /// suitable sources: 185 /// v2.sub0 and v0. 186 /// Then, def can be rewritten into def = COPY v0. 187 class ValueTracker { 188 private: 189 /// The current point into the use-def chain. 190 const MachineInstr *Def; 191 /// The index of the definition in Def. 192 unsigned DefIdx; 193 /// The sub register index of the definition. 194 unsigned DefSubReg; 195 /// The register where the value can be found. 196 unsigned Reg; 197 /// Specifiy whether or not the value tracking looks through 198 /// complex instructions. When this is false, the value tracker 199 /// bails on everything that is not a copy or a bitcast. 200 /// 201 /// Note: This could have been implemented as a specialized version of 202 /// the ValueTracker class but that would have complicated the code of 203 /// the users of this class. 204 bool UseAdvancedTracking; 205 /// MachineRegisterInfo used to perform tracking. 206 const MachineRegisterInfo &MRI; 207 /// Optional TargetInstrInfo used to perform some complex 208 /// tracking. 209 const TargetInstrInfo *TII; 210 211 /// \brief Dispatcher to the right underlying implementation of 212 /// getNextSource. 213 bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); 214 /// \brief Specialized version of getNextSource for Copy instructions. 215 bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); 216 /// \brief Specialized version of getNextSource for Bitcast instructions. 217 bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); 218 /// \brief Specialized version of getNextSource for RegSequence 219 /// instructions. 220 bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); 221 /// \brief Specialized version of getNextSource for InsertSubreg 222 /// instructions. 223 bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 224 /// \brief Specialized version of getNextSource for ExtractSubreg 225 /// instructions. 226 bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 227 /// \brief Specialized version of getNextSource for SubregToReg 228 /// instructions. 229 bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); 230 231 public: 232 /// \brief Create a ValueTracker instance for the value defined by \p Reg. 233 /// \p DefSubReg represents the sub register index the value tracker will 234 /// track. It does not need to match the sub register index used in the 235 /// definition of \p Reg. 236 /// \p UseAdvancedTracking specifies whether or not the value tracker looks 237 /// through complex instructions. By default (false), it handles only copy 238 /// and bitcast instructions. 239 /// If \p Reg is a physical register, a value tracker constructed with 240 /// this constructor will not find any alternative source. 241 /// Indeed, when \p Reg is a physical register that constructor does not 242 /// know which definition of \p Reg it should track. 243 /// Use the next constructor to track a physical register. 244 ValueTracker(unsigned Reg, unsigned DefSubReg, 245 const MachineRegisterInfo &MRI, 246 bool UseAdvancedTracking = false, 247 const TargetInstrInfo *TII = nullptr) 248 : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), 249 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 250 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 251 Def = MRI.getVRegDef(Reg); 252 DefIdx = MRI.def_begin(Reg).getOperandNo(); 253 } 254 } 255 256 /// \brief Create a ValueTracker instance for the value defined by 257 /// the pair \p MI, \p DefIdx. 258 /// Unlike the other constructor, the value tracker produced by this one 259 /// may be able to find a new source when the definition is a physical 260 /// register. 261 /// This could be useful to rewrite target specific instructions into 262 /// generic copy instructions. 263 ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, 264 const MachineRegisterInfo &MRI, 265 bool UseAdvancedTracking = false, 266 const TargetInstrInfo *TII = nullptr) 267 : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), 268 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 269 assert(DefIdx < Def->getDesc().getNumDefs() && 270 Def->getOperand(DefIdx).isReg() && "Invalid definition"); 271 Reg = Def->getOperand(DefIdx).getReg(); 272 } 273 274 /// \brief Following the use-def chain, get the next available source 275 /// for the tracked value. 276 /// When the returned value is not nullptr, \p SrcReg gives the register 277 /// that contain the tracked value. 278 /// \note The sub register index returned in \p SrcSubReg must be used 279 /// on \p SrcReg to access the actual value. 280 /// \return Unless the returned value is nullptr (i.e., no source found), 281 /// \p SrcReg gives the register of the next source used in the returned 282 /// instruction and \p SrcSubReg the sub-register index to be used on that 283 /// source to get the tracked value. When nullptr is returned, no 284 /// alternative source has been found. 285 const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); 286 287 /// \brief Get the last register where the initial value can be found. 288 /// Initially this is the register of the definition. 289 /// Then, after each successful call to getNextSource, this is the 290 /// register of the last source. 291 unsigned getReg() const { return Reg; } 292 }; 293 } 294 295 char PeepholeOptimizer::ID = 0; 296 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; 297 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", 298 "Peephole Optimizations", false, false) 299 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 300 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", 301 "Peephole Optimizations", false, false) 302 303 /// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads 304 /// a single register and writes a single register and it does not modify the 305 /// source, and if the source value is preserved as a sub-register of the 306 /// result, then replace all reachable uses of the source with the subreg of the 307 /// result. 308 /// 309 /// Do not generate an EXTRACT that is used only in a debug use, as this changes 310 /// the code. Since this code does not currently share EXTRACTs, just ignore all 311 /// debug uses. 312 bool PeepholeOptimizer:: 313 optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 314 SmallPtrSetImpl<MachineInstr*> &LocalMIs) { 315 unsigned SrcReg, DstReg, SubIdx; 316 if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) 317 return false; 318 319 if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 320 TargetRegisterInfo::isPhysicalRegister(SrcReg)) 321 return false; 322 323 if (MRI->hasOneNonDBGUse(SrcReg)) 324 // No other uses. 325 return false; 326 327 // Ensure DstReg can get a register class that actually supports 328 // sub-registers. Don't change the class until we commit. 329 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); 330 DstRC = TM->getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg( 331 DstRC, SubIdx); 332 if (!DstRC) 333 return false; 334 335 // The ext instr may be operating on a sub-register of SrcReg as well. 336 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit 337 // register. 338 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of 339 // SrcReg:SubIdx should be replaced. 340 bool UseSrcSubIdx = 341 TM->getSubtargetImpl()->getRegisterInfo()->getSubClassWithSubReg( 342 MRI->getRegClass(SrcReg), SubIdx) != nullptr; 343 344 // The source has other uses. See if we can replace the other uses with use of 345 // the result of the extension. 346 SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 347 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 348 ReachedBBs.insert(UI.getParent()); 349 350 // Uses that are in the same BB of uses of the result of the instruction. 351 SmallVector<MachineOperand*, 8> Uses; 352 353 // Uses that the result of the instruction can reach. 354 SmallVector<MachineOperand*, 8> ExtendedUses; 355 356 bool ExtendLife = true; 357 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { 358 MachineInstr *UseMI = UseMO.getParent(); 359 if (UseMI == MI) 360 continue; 361 362 if (UseMI->isPHI()) { 363 ExtendLife = false; 364 continue; 365 } 366 367 // Only accept uses of SrcReg:SubIdx. 368 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) 369 continue; 370 371 // It's an error to translate this: 372 // 373 // %reg1025 = <sext> %reg1024 374 // ... 375 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 376 // 377 // into this: 378 // 379 // %reg1025 = <sext> %reg1024 380 // ... 381 // %reg1027 = COPY %reg1025:4 382 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 383 // 384 // The problem here is that SUBREG_TO_REG is there to assert that an 385 // implicit zext occurs. It doesn't insert a zext instruction. If we allow 386 // the COPY here, it will give us the value after the <sext>, not the 387 // original value of %reg1024 before <sext>. 388 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 389 continue; 390 391 MachineBasicBlock *UseMBB = UseMI->getParent(); 392 if (UseMBB == MBB) { 393 // Local uses that come after the extension. 394 if (!LocalMIs.count(UseMI)) 395 Uses.push_back(&UseMO); 396 } else if (ReachedBBs.count(UseMBB)) { 397 // Non-local uses where the result of the extension is used. Always 398 // replace these unless it's a PHI. 399 Uses.push_back(&UseMO); 400 } else if (Aggressive && DT->dominates(MBB, UseMBB)) { 401 // We may want to extend the live range of the extension result in order 402 // to replace these uses. 403 ExtendedUses.push_back(&UseMO); 404 } else { 405 // Both will be live out of the def MBB anyway. Don't extend live range of 406 // the extension result. 407 ExtendLife = false; 408 break; 409 } 410 } 411 412 if (ExtendLife && !ExtendedUses.empty()) 413 // Extend the liveness of the extension result. 414 std::copy(ExtendedUses.begin(), ExtendedUses.end(), 415 std::back_inserter(Uses)); 416 417 // Now replace all uses. 418 bool Changed = false; 419 if (!Uses.empty()) { 420 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 421 422 // Look for PHI uses of the extended result, we don't want to extend the 423 // liveness of a PHI input. It breaks all kinds of assumptions down 424 // stream. A PHI use is expected to be the kill of its source values. 425 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 426 if (UI.isPHI()) 427 PHIBBs.insert(UI.getParent()); 428 429 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 430 for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 431 MachineOperand *UseMO = Uses[i]; 432 MachineInstr *UseMI = UseMO->getParent(); 433 MachineBasicBlock *UseMBB = UseMI->getParent(); 434 if (PHIBBs.count(UseMBB)) 435 continue; 436 437 // About to add uses of DstReg, clear DstReg's kill flags. 438 if (!Changed) { 439 MRI->clearKillFlags(DstReg); 440 MRI->constrainRegClass(DstReg, DstRC); 441 } 442 443 unsigned NewVR = MRI->createVirtualRegister(RC); 444 MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 445 TII->get(TargetOpcode::COPY), NewVR) 446 .addReg(DstReg, 0, SubIdx); 447 // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. 448 if (UseSrcSubIdx) { 449 Copy->getOperand(0).setSubReg(SubIdx); 450 Copy->getOperand(0).setIsUndef(); 451 } 452 UseMO->setReg(NewVR); 453 ++NumReuse; 454 Changed = true; 455 } 456 } 457 458 return Changed; 459 } 460 461 /// optimizeCmpInstr - If the instruction is a compare and the previous 462 /// instruction it's comparing against all ready sets (or could be modified to 463 /// set) the same flag as the compare, then we can remove the comparison and use 464 /// the flag from the previous instruction. 465 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, 466 MachineBasicBlock *MBB) { 467 // If this instruction is a comparison against zero and isn't comparing a 468 // physical register, we can try to optimize it. 469 unsigned SrcReg, SrcReg2; 470 int CmpMask, CmpValue; 471 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || 472 TargetRegisterInfo::isPhysicalRegister(SrcReg) || 473 (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) 474 return false; 475 476 // Attempt to optimize the comparison instruction. 477 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { 478 ++NumCmps; 479 return true; 480 } 481 482 return false; 483 } 484 485 /// Optimize a select instruction. 486 bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { 487 unsigned TrueOp = 0; 488 unsigned FalseOp = 0; 489 bool Optimizable = false; 490 SmallVector<MachineOperand, 4> Cond; 491 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) 492 return false; 493 if (!Optimizable) 494 return false; 495 if (!TII->optimizeSelect(MI)) 496 return false; 497 MI->eraseFromParent(); 498 ++NumSelects; 499 return true; 500 } 501 502 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg) 503 /// share the same register file. 504 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, 505 const TargetRegisterClass *DefRC, 506 unsigned DefSubReg, 507 const TargetRegisterClass *SrcRC, 508 unsigned SrcSubReg) { 509 // Same register class. 510 if (DefRC == SrcRC) 511 return true; 512 513 // Both operands are sub registers. Check if they share a register class. 514 unsigned SrcIdx, DefIdx; 515 if (SrcSubReg && DefSubReg) 516 return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, 517 SrcIdx, DefIdx) != nullptr; 518 // At most one of the register is a sub register, make it Src to avoid 519 // duplicating the test. 520 if (!SrcSubReg) { 521 std::swap(DefSubReg, SrcSubReg); 522 std::swap(DefRC, SrcRC); 523 } 524 525 // One of the register is a sub register, check if we can get a superclass. 526 if (SrcSubReg) 527 return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; 528 // Plain copy. 529 return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; 530 } 531 532 /// \brief Try to find the next source that share the same register file 533 /// for the value defined by \p Reg and \p SubReg. 534 /// When true is returned, \p Reg and \p SubReg are updated with the 535 /// register number and sub-register index of the new source. 536 /// \return False if no alternative sources are available. True otherwise. 537 bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { 538 // Do not try to find a new source for a physical register. 539 // So far we do not have any motivating example for doing that. 540 // Thus, instead of maintaining untested code, we will revisit that if 541 // that changes at some point. 542 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 543 return false; 544 545 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); 546 unsigned DefSubReg = SubReg; 547 548 unsigned Src; 549 unsigned SrcSubReg; 550 bool ShouldRewrite = false; 551 const TargetRegisterInfo &TRI = *TM->getSubtargetImpl()->getRegisterInfo(); 552 553 // Follow the chain of copies until we reach the top of the use-def chain 554 // or find a more suitable source. 555 ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); 556 do { 557 unsigned CopySrcReg, CopySrcSubReg; 558 if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) 559 break; 560 Src = CopySrcReg; 561 SrcSubReg = CopySrcSubReg; 562 563 // Do not extend the live-ranges of physical registers as they add 564 // constraints to the register allocator. 565 // Moreover, if we want to extend the live-range of a physical register, 566 // unlike SSA virtual register, we will have to check that they are not 567 // redefine before the related use. 568 if (TargetRegisterInfo::isPhysicalRegister(Src)) 569 break; 570 571 const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); 572 573 // If this source does not incur a cross register bank copy, use it. 574 ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, 575 SrcSubReg); 576 } while (!ShouldRewrite); 577 578 // If we did not find a more suitable source, there is nothing to optimize. 579 if (!ShouldRewrite || Src == Reg) 580 return false; 581 582 Reg = Src; 583 SubReg = SrcSubReg; 584 return true; 585 } 586 587 namespace { 588 /// \brief Helper class to rewrite the arguments of a copy-like instruction. 589 class CopyRewriter { 590 protected: 591 /// The copy-like instruction. 592 MachineInstr &CopyLike; 593 /// The index of the source being rewritten. 594 unsigned CurrentSrcIdx; 595 596 public: 597 CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} 598 599 virtual ~CopyRewriter() {} 600 601 /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and 602 /// the related value that it affects (TrackReg, TrackSubReg). 603 /// A source is considered rewritable if its register class and the 604 /// register class of the related TrackReg may not be register 605 /// coalescer friendly. In other words, given a copy-like instruction 606 /// not all the arguments may be returned at rewritable source, since 607 /// some arguments are none to be register coalescer friendly. 608 /// 609 /// Each call of this method moves the current source to the next 610 /// rewritable source. 611 /// For instance, let CopyLike be the instruction to rewrite. 612 /// CopyLike has one definition and one source: 613 /// dst.dstSubIdx = CopyLike src.srcSubIdx. 614 /// 615 /// The first call will give the first rewritable source, i.e., 616 /// the only source this instruction has: 617 /// (SrcReg, SrcSubReg) = (src, srcSubIdx). 618 /// This source defines the whole definition, i.e., 619 /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). 620 /// 621 /// The second and subsequent calls will return false, has there is only one 622 /// rewritable source. 623 /// 624 /// \return True if a rewritable source has been found, false otherwise. 625 /// The output arguments are valid if and only if true is returned. 626 virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 627 unsigned &TrackReg, 628 unsigned &TrackSubReg) { 629 // If CurrentSrcIdx == 1, this means this function has already been 630 // called once. CopyLike has one defintiion and one argument, thus, 631 // there is nothing else to rewrite. 632 if (!CopyLike.isCopy() || CurrentSrcIdx == 1) 633 return false; 634 // This is the first call to getNextRewritableSource. 635 // Move the CurrentSrcIdx to remember that we made that call. 636 CurrentSrcIdx = 1; 637 // The rewritable source is the argument. 638 const MachineOperand &MOSrc = CopyLike.getOperand(1); 639 SrcReg = MOSrc.getReg(); 640 SrcSubReg = MOSrc.getSubReg(); 641 // What we track are the alternative sources of the definition. 642 const MachineOperand &MODef = CopyLike.getOperand(0); 643 TrackReg = MODef.getReg(); 644 TrackSubReg = MODef.getSubReg(); 645 return true; 646 } 647 648 /// \brief Rewrite the current source with \p NewReg and \p NewSubReg 649 /// if possible. 650 /// \return True if the rewritting was possible, false otherwise. 651 virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { 652 if (!CopyLike.isCopy() || CurrentSrcIdx != 1) 653 return false; 654 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); 655 MOSrc.setReg(NewReg); 656 MOSrc.setSubReg(NewSubReg); 657 return true; 658 } 659 }; 660 661 /// \brief Specialized rewriter for INSERT_SUBREG instruction. 662 class InsertSubregRewriter : public CopyRewriter { 663 public: 664 InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { 665 assert(MI.isInsertSubreg() && "Invalid instruction"); 666 } 667 668 /// \brief See CopyRewriter::getNextRewritableSource. 669 /// Here CopyLike has the following form: 670 /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. 671 /// Src1 has the same register class has dst, hence, there is 672 /// nothing to rewrite. 673 /// Src2.src2SubIdx, may not be register coalescer friendly. 674 /// Therefore, the first call to this method returns: 675 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 676 /// (TrackReg, TrackSubReg) = (dst, subIdx). 677 /// 678 /// Subsequence calls will return false. 679 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 680 unsigned &TrackReg, 681 unsigned &TrackSubReg) override { 682 // If we already get the only source we can rewrite, return false. 683 if (CurrentSrcIdx == 2) 684 return false; 685 // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. 686 CurrentSrcIdx = 2; 687 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); 688 SrcReg = MOInsertedReg.getReg(); 689 SrcSubReg = MOInsertedReg.getSubReg(); 690 const MachineOperand &MODef = CopyLike.getOperand(0); 691 692 // We want to track something that is compatible with the 693 // partial definition. 694 TrackReg = MODef.getReg(); 695 if (MODef.getSubReg()) 696 // Bails if we have to compose sub-register indices. 697 return false; 698 TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); 699 return true; 700 } 701 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 702 if (CurrentSrcIdx != 2) 703 return false; 704 // We are rewriting the inserted reg. 705 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 706 MO.setReg(NewReg); 707 MO.setSubReg(NewSubReg); 708 return true; 709 } 710 }; 711 712 /// \brief Specialized rewriter for EXTRACT_SUBREG instruction. 713 class ExtractSubregRewriter : public CopyRewriter { 714 const TargetInstrInfo &TII; 715 716 public: 717 ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) 718 : CopyRewriter(MI), TII(TII) { 719 assert(MI.isExtractSubreg() && "Invalid instruction"); 720 } 721 722 /// \brief See CopyRewriter::getNextRewritableSource. 723 /// Here CopyLike has the following form: 724 /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. 725 /// There is only one rewritable source: Src.subIdx, 726 /// which defines dst.dstSubIdx. 727 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 728 unsigned &TrackReg, 729 unsigned &TrackSubReg) override { 730 // If we already get the only source we can rewrite, return false. 731 if (CurrentSrcIdx == 1) 732 return false; 733 // We are looking at v1 = EXTRACT_SUBREG v0, sub0. 734 CurrentSrcIdx = 1; 735 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); 736 SrcReg = MOExtractedReg.getReg(); 737 // If we have to compose sub-register indices, bails out. 738 if (MOExtractedReg.getSubReg()) 739 return false; 740 741 SrcSubReg = CopyLike.getOperand(2).getImm(); 742 743 // We want to track something that is compatible with the definition. 744 const MachineOperand &MODef = CopyLike.getOperand(0); 745 TrackReg = MODef.getReg(); 746 TrackSubReg = MODef.getSubReg(); 747 return true; 748 } 749 750 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 751 // The only source we can rewrite is the input register. 752 if (CurrentSrcIdx != 1) 753 return false; 754 755 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); 756 757 // If we find a source that does not require to extract something, 758 // rewrite the operation with a copy. 759 if (!NewSubReg) { 760 // Move the current index to an invalid position. 761 // We do not want another call to this method to be able 762 // to do any change. 763 CurrentSrcIdx = -1; 764 // Rewrite the operation as a COPY. 765 // Get rid of the sub-register index. 766 CopyLike.RemoveOperand(2); 767 // Morph the operation into a COPY. 768 CopyLike.setDesc(TII.get(TargetOpcode::COPY)); 769 return true; 770 } 771 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); 772 return true; 773 } 774 }; 775 776 /// \brief Specialized rewriter for REG_SEQUENCE instruction. 777 class RegSequenceRewriter : public CopyRewriter { 778 public: 779 RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { 780 assert(MI.isRegSequence() && "Invalid instruction"); 781 } 782 783 /// \brief See CopyRewriter::getNextRewritableSource. 784 /// Here CopyLike has the following form: 785 /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. 786 /// Each call will return a different source, walking all the available 787 /// source. 788 /// 789 /// The first call returns: 790 /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). 791 /// (TrackReg, TrackSubReg) = (dst, subIdx1). 792 /// 793 /// The second call returns: 794 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 795 /// (TrackReg, TrackSubReg) = (dst, subIdx2). 796 /// 797 /// And so on, until all the sources have been traversed, then 798 /// it returns false. 799 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 800 unsigned &TrackReg, 801 unsigned &TrackSubReg) override { 802 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. 803 804 // If this is the first call, move to the first argument. 805 if (CurrentSrcIdx == 0) { 806 CurrentSrcIdx = 1; 807 } else { 808 // Otherwise, move to the next argument and check that it is valid. 809 CurrentSrcIdx += 2; 810 if (CurrentSrcIdx >= CopyLike.getNumOperands()) 811 return false; 812 } 813 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); 814 SrcReg = MOInsertedReg.getReg(); 815 // If we have to compose sub-register indices, bails out. 816 if ((SrcSubReg = MOInsertedReg.getSubReg())) 817 return false; 818 819 // We want to track something that is compatible with the related 820 // partial definition. 821 TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); 822 823 const MachineOperand &MODef = CopyLike.getOperand(0); 824 TrackReg = MODef.getReg(); 825 // If we have to compose sub-registers, bails. 826 return MODef.getSubReg() == 0; 827 } 828 829 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 830 // We cannot rewrite out of bound operands. 831 // Moreover, rewritable sources are at odd positions. 832 if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) 833 return false; 834 835 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 836 MO.setReg(NewReg); 837 MO.setSubReg(NewSubReg); 838 return true; 839 } 840 }; 841 } // End namespace. 842 843 /// \brief Get the appropriated CopyRewriter for \p MI. 844 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr 845 /// if no rewriter works for \p MI. 846 static CopyRewriter *getCopyRewriter(MachineInstr &MI, 847 const TargetInstrInfo &TII) { 848 switch (MI.getOpcode()) { 849 default: 850 return nullptr; 851 case TargetOpcode::COPY: 852 return new CopyRewriter(MI); 853 case TargetOpcode::INSERT_SUBREG: 854 return new InsertSubregRewriter(MI); 855 case TargetOpcode::EXTRACT_SUBREG: 856 return new ExtractSubregRewriter(MI, TII); 857 case TargetOpcode::REG_SEQUENCE: 858 return new RegSequenceRewriter(MI); 859 } 860 llvm_unreachable(nullptr); 861 } 862 863 /// \brief Optimize generic copy instructions to avoid cross 864 /// register bank copy. The optimization looks through a chain of 865 /// copies and tries to find a source that has a compatible register 866 /// class. 867 /// Two register classes are considered to be compatible if they share 868 /// the same register bank. 869 /// New copies issued by this optimization are register allocator 870 /// friendly. This optimization does not remove any copy as it may 871 /// overconstraint the register allocator, but replaces some operands 872 /// when possible. 873 /// \pre isCoalescableCopy(*MI) is true. 874 /// \return True, when \p MI has been rewritten. False otherwise. 875 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { 876 assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); 877 assert(MI->getDesc().getNumDefs() == 1 && 878 "Coalescer can understand multiple defs?!"); 879 const MachineOperand &MODef = MI->getOperand(0); 880 // Do not rewrite physical definitions. 881 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 882 return false; 883 884 bool Changed = false; 885 // Get the right rewriter for the current copy. 886 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII)); 887 // If none exists, bails out. 888 if (!CpyRewriter) 889 return false; 890 // Rewrite each rewritable source. 891 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; 892 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, 893 TrackSubReg)) { 894 unsigned NewSrc = TrackReg; 895 unsigned NewSubReg = TrackSubReg; 896 // Try to find a more suitable source. 897 // If we failed to do so, or get the actual source, 898 // move to the next source. 899 if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) 900 continue; 901 // Rewrite source. 902 if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { 903 // We may have extended the live-range of NewSrc, account for that. 904 MRI->clearKillFlags(NewSrc); 905 Changed = true; 906 } 907 } 908 // TODO: We could have a clean-up method to tidy the instruction. 909 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 910 // => v0 = COPY v1 911 // Currently we haven't seen motivating example for that and we 912 // want to avoid untested code. 913 NumRewrittenCopies += Changed == true; 914 return Changed; 915 } 916 917 /// \brief Optimize copy-like instructions to create 918 /// register coalescer friendly instruction. 919 /// The optimization tries to kill-off the \p MI by looking 920 /// through a chain of copies to find a source that has a compatible 921 /// register class. 922 /// If such a source is found, it replace \p MI by a generic COPY 923 /// operation. 924 /// \pre isUncoalescableCopy(*MI) is true. 925 /// \return True, when \p MI has been optimized. In that case, \p MI has 926 /// been removed from its parent. 927 /// All COPY instructions created, are inserted in \p LocalMIs. 928 bool PeepholeOptimizer::optimizeUncoalescableCopy( 929 MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 930 assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); 931 932 // Check if we can rewrite all the values defined by this instruction. 933 SmallVector< 934 std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, 935 4> RewritePairs; 936 for (const MachineOperand &MODef : MI->defs()) { 937 if (MODef.isDead()) 938 // We can ignore those. 939 continue; 940 941 // If a physical register is here, this is probably for a good reason. 942 // Do not rewrite that. 943 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 944 return false; 945 946 // If we do not know how to rewrite this definition, there is no point 947 // in trying to kill this instruction. 948 TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); 949 TargetInstrInfo::RegSubRegPair Src = Def; 950 if (!findNextSource(Src.Reg, Src.SubReg)) 951 return false; 952 RewritePairs.push_back(std::make_pair(Def, Src)); 953 } 954 // The change is possible for all defs, do it. 955 for (const auto &PairDefSrc : RewritePairs) { 956 const auto &Def = PairDefSrc.first; 957 const auto &Src = PairDefSrc.second; 958 // Rewrite the "copy" in a way the register coalescer understands. 959 assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && 960 "We do not rewrite physical registers"); 961 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); 962 unsigned NewVR = MRI->createVirtualRegister(DefRC); 963 MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 964 TII->get(TargetOpcode::COPY), 965 NewVR).addReg(Src.Reg, 0, Src.SubReg); 966 NewCopy->getOperand(0).setSubReg(Def.SubReg); 967 if (Def.SubReg) 968 NewCopy->getOperand(0).setIsUndef(); 969 LocalMIs.insert(NewCopy); 970 MRI->replaceRegWith(Def.Reg, NewVR); 971 MRI->clearKillFlags(NewVR); 972 // We extended the lifetime of Src. 973 // Clear the kill flags to account for that. 974 MRI->clearKillFlags(Src.Reg); 975 } 976 // MI is now dead. 977 MI->eraseFromParent(); 978 ++NumUncoalescableCopies; 979 return true; 980 } 981 982 /// isLoadFoldable - Check whether MI is a candidate for folding into a later 983 /// instruction. We only fold loads to virtual registers and the virtual 984 /// register defined has a single use. 985 bool PeepholeOptimizer::isLoadFoldable( 986 MachineInstr *MI, 987 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { 988 if (!MI->canFoldAsLoad() || !MI->mayLoad()) 989 return false; 990 const MCInstrDesc &MCID = MI->getDesc(); 991 if (MCID.getNumDefs() != 1) 992 return false; 993 994 unsigned Reg = MI->getOperand(0).getReg(); 995 // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting 996 // loads. It should be checked when processing uses of the load, since 997 // uses can be removed during peephole. 998 if (!MI->getOperand(0).getSubReg() && 999 TargetRegisterInfo::isVirtualRegister(Reg) && 1000 MRI->hasOneNonDBGUse(Reg)) { 1001 FoldAsLoadDefCandidates.insert(Reg); 1002 return true; 1003 } 1004 return false; 1005 } 1006 1007 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, 1008 SmallSet<unsigned, 4> &ImmDefRegs, 1009 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1010 const MCInstrDesc &MCID = MI->getDesc(); 1011 if (!MI->isMoveImmediate()) 1012 return false; 1013 if (MCID.getNumDefs() != 1) 1014 return false; 1015 unsigned Reg = MI->getOperand(0).getReg(); 1016 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1017 ImmDefMIs.insert(std::make_pair(Reg, MI)); 1018 ImmDefRegs.insert(Reg); 1019 return true; 1020 } 1021 1022 return false; 1023 } 1024 1025 /// foldImmediate - Try folding register operands that are defined by move 1026 /// immediate instructions, i.e. a trivial constant folding optimization, if 1027 /// and only if the def and use are in the same BB. 1028 bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 1029 SmallSet<unsigned, 4> &ImmDefRegs, 1030 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1031 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1032 MachineOperand &MO = MI->getOperand(i); 1033 if (!MO.isReg() || MO.isDef()) 1034 continue; 1035 unsigned Reg = MO.getReg(); 1036 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1037 continue; 1038 if (ImmDefRegs.count(Reg) == 0) 1039 continue; 1040 DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); 1041 assert(II != ImmDefMIs.end()); 1042 if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { 1043 ++NumImmFold; 1044 return true; 1045 } 1046 } 1047 return false; 1048 } 1049 1050 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 1051 if (skipOptnoneFunction(*MF.getFunction())) 1052 return false; 1053 1054 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); 1055 DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); 1056 1057 if (DisablePeephole) 1058 return false; 1059 1060 TM = &MF.getTarget(); 1061 TII = TM->getSubtargetImpl()->getInstrInfo(); 1062 MRI = &MF.getRegInfo(); 1063 DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; 1064 1065 bool Changed = false; 1066 1067 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 1068 MachineBasicBlock *MBB = &*I; 1069 1070 bool SeenMoveImm = false; 1071 SmallPtrSet<MachineInstr*, 16> LocalMIs; 1072 SmallSet<unsigned, 4> ImmDefRegs; 1073 DenseMap<unsigned, MachineInstr*> ImmDefMIs; 1074 SmallSet<unsigned, 16> FoldAsLoadDefCandidates; 1075 1076 for (MachineBasicBlock::iterator 1077 MII = I->begin(), MIE = I->end(); MII != MIE; ) { 1078 MachineInstr *MI = &*MII; 1079 // We may be erasing MI below, increment MII now. 1080 ++MII; 1081 LocalMIs.insert(MI); 1082 1083 // Skip debug values. They should not affect this peephole optimization. 1084 if (MI->isDebugValue()) 1085 continue; 1086 1087 // If there exists an instruction which belongs to the following 1088 // categories, we will discard the load candidates. 1089 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || 1090 MI->isKill() || MI->isInlineAsm() || 1091 MI->hasUnmodeledSideEffects()) { 1092 FoldAsLoadDefCandidates.clear(); 1093 continue; 1094 } 1095 if (MI->mayStore() || MI->isCall()) 1096 FoldAsLoadDefCandidates.clear(); 1097 1098 if ((isUncoalescableCopy(*MI) && 1099 optimizeUncoalescableCopy(MI, LocalMIs)) || 1100 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || 1101 (MI->isSelect() && optimizeSelect(MI))) { 1102 // MI is deleted. 1103 LocalMIs.erase(MI); 1104 Changed = true; 1105 continue; 1106 } 1107 1108 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { 1109 // MI is just rewritten. 1110 Changed = true; 1111 continue; 1112 } 1113 1114 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { 1115 SeenMoveImm = true; 1116 } else { 1117 Changed |= optimizeExtInstr(MI, MBB, LocalMIs); 1118 // optimizeExtInstr might have created new instructions after MI 1119 // and before the already incremented MII. Adjust MII so that the 1120 // next iteration sees the new instructions. 1121 MII = MI; 1122 ++MII; 1123 if (SeenMoveImm) 1124 Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); 1125 } 1126 1127 // Check whether MI is a load candidate for folding into a later 1128 // instruction. If MI is not a candidate, check whether we can fold an 1129 // earlier load into MI. 1130 if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && 1131 !FoldAsLoadDefCandidates.empty()) { 1132 const MCInstrDesc &MIDesc = MI->getDesc(); 1133 for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); 1134 ++i) { 1135 const MachineOperand &MOp = MI->getOperand(i); 1136 if (!MOp.isReg()) 1137 continue; 1138 unsigned FoldAsLoadDefReg = MOp.getReg(); 1139 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { 1140 // We need to fold load after optimizeCmpInstr, since 1141 // optimizeCmpInstr can enable folding by converting SUB to CMP. 1142 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and 1143 // we need it for markUsesInDebugValueAsUndef(). 1144 unsigned FoldedReg = FoldAsLoadDefReg; 1145 MachineInstr *DefMI = nullptr; 1146 MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, 1147 FoldAsLoadDefReg, 1148 DefMI); 1149 if (FoldMI) { 1150 // Update LocalMIs since we replaced MI with FoldMI and deleted 1151 // DefMI. 1152 DEBUG(dbgs() << "Replacing: " << *MI); 1153 DEBUG(dbgs() << " With: " << *FoldMI); 1154 LocalMIs.erase(MI); 1155 LocalMIs.erase(DefMI); 1156 LocalMIs.insert(FoldMI); 1157 MI->eraseFromParent(); 1158 DefMI->eraseFromParent(); 1159 MRI->markUsesInDebugValueAsUndef(FoldedReg); 1160 FoldAsLoadDefCandidates.erase(FoldedReg); 1161 ++NumLoadFold; 1162 // MI is replaced with FoldMI. 1163 Changed = true; 1164 break; 1165 } 1166 } 1167 } 1168 } 1169 } 1170 } 1171 1172 return Changed; 1173 } 1174 1175 bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, 1176 unsigned &SrcSubReg) { 1177 assert(Def->isCopy() && "Invalid definition"); 1178 // Copy instruction are supposed to be: Def = Src. 1179 // If someone breaks this assumption, bad things will happen everywhere. 1180 assert(Def->getNumOperands() == 2 && "Invalid number of operands"); 1181 1182 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1183 // If we look for a different subreg, it means we want a subreg of src. 1184 // Bails as we do not support composing subreg yet. 1185 return false; 1186 // Otherwise, we want the whole source. 1187 const MachineOperand &Src = Def->getOperand(1); 1188 SrcReg = Src.getReg(); 1189 SrcSubReg = Src.getSubReg(); 1190 return true; 1191 } 1192 1193 bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, 1194 unsigned &SrcSubReg) { 1195 assert(Def->isBitcast() && "Invalid definition"); 1196 1197 // Bail if there are effects that a plain copy will not expose. 1198 if (Def->hasUnmodeledSideEffects()) 1199 return false; 1200 1201 // Bitcasts with more than one def are not supported. 1202 if (Def->getDesc().getNumDefs() != 1) 1203 return false; 1204 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1205 // If we look for a different subreg, it means we want a subreg of the src. 1206 // Bails as we do not support composing subreg yet. 1207 return false; 1208 1209 unsigned SrcIdx = Def->getNumOperands(); 1210 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; 1211 ++OpIdx) { 1212 const MachineOperand &MO = Def->getOperand(OpIdx); 1213 if (!MO.isReg() || !MO.getReg()) 1214 continue; 1215 assert(!MO.isDef() && "We should have skipped all the definitions by now"); 1216 if (SrcIdx != EndOpIdx) 1217 // Multiple sources? 1218 return false; 1219 SrcIdx = OpIdx; 1220 } 1221 const MachineOperand &Src = Def->getOperand(SrcIdx); 1222 SrcReg = Src.getReg(); 1223 SrcSubReg = Src.getSubReg(); 1224 return true; 1225 } 1226 1227 bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, 1228 unsigned &SrcSubReg) { 1229 assert((Def->isRegSequence() || Def->isRegSequenceLike()) && 1230 "Invalid definition"); 1231 1232 if (Def->getOperand(DefIdx).getSubReg()) 1233 // If we are composing subreg, bails out. 1234 // The case we are checking is Def.<subreg> = REG_SEQUENCE. 1235 // This should almost never happen as the SSA property is tracked at 1236 // the register level (as opposed to the subreg level). 1237 // I.e., 1238 // Def.sub0 = 1239 // Def.sub1 = 1240 // is a valid SSA representation for Def.sub0 and Def.sub1, but not for 1241 // Def. Thus, it must not be generated. 1242 // However, some code could theoretically generates a single 1243 // Def.sub0 (i.e, not defining the other subregs) and we would 1244 // have this case. 1245 // If we can ascertain (or force) that this never happens, we could 1246 // turn that into an assertion. 1247 return false; 1248 1249 if (!TII) 1250 // We could handle the REG_SEQUENCE here, but we do not want to 1251 // duplicate the code from the generic TII. 1252 return false; 1253 1254 SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; 1255 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) 1256 return false; 1257 1258 // We are looking at: 1259 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... 1260 // Check if one of the operand defines the subreg we are interested in. 1261 for (auto &RegSeqInput : RegSeqInputRegs) { 1262 if (RegSeqInput.SubIdx == DefSubReg) { 1263 if (RegSeqInput.SubReg) 1264 // Bails if we have to compose sub registers. 1265 return false; 1266 1267 SrcReg = RegSeqInput.Reg; 1268 SrcSubReg = RegSeqInput.SubReg; 1269 return true; 1270 } 1271 } 1272 1273 // If the subreg we are tracking is super-defined by another subreg, 1274 // we could follow this value. However, this would require to compose 1275 // the subreg and we do not do that for now. 1276 return false; 1277 } 1278 1279 bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, 1280 unsigned &SrcSubReg) { 1281 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && 1282 "Invalid definition"); 1283 1284 if (Def->getOperand(DefIdx).getSubReg()) 1285 // If we are composing subreg, bails out. 1286 // Same remark as getNextSourceFromRegSequence. 1287 // I.e., this may be turned into an assert. 1288 return false; 1289 1290 if (!TII) 1291 // We could handle the REG_SEQUENCE here, but we do not want to 1292 // duplicate the code from the generic TII. 1293 return false; 1294 1295 TargetInstrInfo::RegSubRegPair BaseReg; 1296 TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; 1297 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) 1298 return false; 1299 1300 // We are looking at: 1301 // Def = INSERT_SUBREG v0, v1, sub1 1302 // There are two cases: 1303 // 1. DefSubReg == sub1, get v1. 1304 // 2. DefSubReg != sub1, the value may be available through v0. 1305 1306 // #1 Check if the inserted register matches the required sub index. 1307 if (InsertedReg.SubIdx == DefSubReg) { 1308 SrcReg = InsertedReg.Reg; 1309 SrcSubReg = InsertedReg.SubReg; 1310 return true; 1311 } 1312 // #2 Otherwise, if the sub register we are looking for is not partial 1313 // defined by the inserted element, we can look through the main 1314 // register (v0). 1315 const MachineOperand &MODef = Def->getOperand(DefIdx); 1316 // If the result register (Def) and the base register (v0) do not 1317 // have the same register class or if we have to compose 1318 // subregisters, bails out. 1319 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || 1320 BaseReg.SubReg) 1321 return false; 1322 1323 // Get the TRI and check if the inserted sub-register overlaps with the 1324 // sub-register we are tracking. 1325 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 1326 if (!TRI || 1327 (TRI->getSubRegIndexLaneMask(DefSubReg) & 1328 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) 1329 return false; 1330 // At this point, the value is available in v0 via the same subreg 1331 // we used for Def. 1332 SrcReg = BaseReg.Reg; 1333 SrcSubReg = DefSubReg; 1334 return true; 1335 } 1336 1337 bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, 1338 unsigned &SrcSubReg) { 1339 assert((Def->isExtractSubreg() || 1340 Def->isExtractSubregLike()) && "Invalid definition"); 1341 // We are looking at: 1342 // Def = EXTRACT_SUBREG v0, sub0 1343 1344 // Bails if we have to compose sub registers. 1345 // Indeed, if DefSubReg != 0, we would have to compose it with sub0. 1346 if (DefSubReg) 1347 return false; 1348 1349 if (!TII) 1350 // We could handle the EXTRACT_SUBREG here, but we do not want to 1351 // duplicate the code from the generic TII. 1352 return false; 1353 1354 TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; 1355 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) 1356 return false; 1357 1358 // Bails if we have to compose sub registers. 1359 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. 1360 if (ExtractSubregInputReg.SubReg) 1361 return false; 1362 // Otherwise, the value is available in the v0.sub0. 1363 SrcReg = ExtractSubregInputReg.Reg; 1364 SrcSubReg = ExtractSubregInputReg.SubIdx; 1365 return true; 1366 } 1367 1368 bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, 1369 unsigned &SrcSubReg) { 1370 assert(Def->isSubregToReg() && "Invalid definition"); 1371 // We are looking at: 1372 // Def = SUBREG_TO_REG Imm, v0, sub0 1373 1374 // Bails if we have to compose sub registers. 1375 // If DefSubReg != sub0, we would have to check that all the bits 1376 // we track are included in sub0 and if yes, we would have to 1377 // determine the right subreg in v0. 1378 if (DefSubReg != Def->getOperand(3).getImm()) 1379 return false; 1380 // Bails if we have to compose sub registers. 1381 // Likewise, if v0.subreg != 0, we would have to compose it with sub0. 1382 if (Def->getOperand(2).getSubReg()) 1383 return false; 1384 1385 SrcReg = Def->getOperand(2).getReg(); 1386 SrcSubReg = Def->getOperand(3).getImm(); 1387 return true; 1388 } 1389 1390 bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { 1391 assert(Def && "This method needs a valid definition"); 1392 1393 assert( 1394 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && 1395 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); 1396 if (Def->isCopy()) 1397 return getNextSourceFromCopy(SrcReg, SrcSubReg); 1398 if (Def->isBitcast()) 1399 return getNextSourceFromBitcast(SrcReg, SrcSubReg); 1400 // All the remaining cases involve "complex" instructions. 1401 // Bails if we did not ask for the advanced tracking. 1402 if (!UseAdvancedTracking) 1403 return false; 1404 if (Def->isRegSequence() || Def->isRegSequenceLike()) 1405 return getNextSourceFromRegSequence(SrcReg, SrcSubReg); 1406 if (Def->isInsertSubreg() || Def->isInsertSubregLike()) 1407 return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); 1408 if (Def->isExtractSubreg() || Def->isExtractSubregLike()) 1409 return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); 1410 if (Def->isSubregToReg()) 1411 return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); 1412 return false; 1413 } 1414 1415 const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, 1416 unsigned &SrcSubReg) { 1417 // If we reach a point where we cannot move up in the use-def chain, 1418 // there is nothing we can get. 1419 if (!Def) 1420 return nullptr; 1421 1422 const MachineInstr *PrevDef = nullptr; 1423 // Try to find the next source. 1424 if (getNextSourceImpl(SrcReg, SrcSubReg)) { 1425 // Update definition, definition index, and subregister for the 1426 // next call of getNextSource. 1427 // Update the current register. 1428 Reg = SrcReg; 1429 // Update the return value before moving up in the use-def chain. 1430 PrevDef = Def; 1431 // If we can still move up in the use-def chain, move to the next 1432 // defintion. 1433 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 1434 Def = MRI.getVRegDef(Reg); 1435 DefIdx = MRI.def_begin(Reg).getOperandNo(); 1436 DefSubReg = SrcSubReg; 1437 return PrevDef; 1438 } 1439 } 1440 // If we end up here, this means we will not be able to find another source 1441 // for the next iteration. 1442 // Make sure any new call to getNextSource bails out early by cutting the 1443 // use-def chain. 1444 Def = nullptr; 1445 return PrevDef; 1446 } 1447