1 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Perform peephole optimizations on the machine code: 11 // 12 // - Optimize Extensions 13 // 14 // Optimization of sign / zero extension instructions. It may be extended to 15 // handle other instructions with similar properties. 16 // 17 // On some targets, some instructions, e.g. X86 sign / zero extension, may 18 // leave the source value in the lower part of the result. This optimization 19 // will replace some uses of the pre-extension value with uses of the 20 // sub-register of the results. 21 // 22 // - Optimize Comparisons 23 // 24 // Optimization of comparison instructions. For instance, in this code: 25 // 26 // sub r1, 1 27 // cmp r1, 0 28 // bz L1 29 // 30 // If the "sub" instruction all ready sets (or could be modified to set) the 31 // same flag that the "cmp" instruction sets and that "bz" uses, then we can 32 // eliminate the "cmp" instruction. 33 // 34 // Another instance, in this code: 35 // 36 // sub r1, r3 | sub r1, imm 37 // cmp r3, r1 or cmp r1, r3 | cmp r1, imm 38 // bge L1 39 // 40 // If the branch instruction can use flag from "sub", then we can replace 41 // "sub" with "subs" and eliminate the "cmp" instruction. 42 // 43 // - Optimize Loads: 44 // 45 // Loads that can be folded into a later instruction. A load is foldable 46 // if it loads to virtual registers and the virtual register defined has 47 // a single use. 48 // 49 // - Optimize Copies and Bitcast (more generally, target specific copies): 50 // 51 // Rewrite copies and bitcasts to avoid cross register bank copies 52 // when possible. 53 // E.g., Consider the following example, where capital and lower 54 // letters denote different register file: 55 // b = copy A <-- cross-bank copy 56 // C = copy b <-- cross-bank copy 57 // => 58 // b = copy A <-- cross-bank copy 59 // C = copy A <-- same-bank copy 60 // 61 // E.g., for bitcast: 62 // b = bitcast A <-- cross-bank copy 63 // C = bitcast b <-- cross-bank copy 64 // => 65 // b = bitcast A <-- cross-bank copy 66 // C = copy A <-- same-bank copy 67 //===----------------------------------------------------------------------===// 68 69 #include "llvm/CodeGen/Passes.h" 70 #include "llvm/ADT/DenseMap.h" 71 #include "llvm/ADT/SmallPtrSet.h" 72 #include "llvm/ADT/SmallSet.h" 73 #include "llvm/ADT/Statistic.h" 74 #include "llvm/CodeGen/MachineDominators.h" 75 #include "llvm/CodeGen/MachineInstrBuilder.h" 76 #include "llvm/CodeGen/MachineRegisterInfo.h" 77 #include "llvm/Support/CommandLine.h" 78 #include "llvm/Support/Debug.h" 79 #include "llvm/Target/TargetInstrInfo.h" 80 #include "llvm/Target/TargetRegisterInfo.h" 81 #include "llvm/Target/TargetSubtargetInfo.h" 82 #include <utility> 83 using namespace llvm; 84 85 #define DEBUG_TYPE "peephole-opt" 86 87 // Optimize Extensions 88 static cl::opt<bool> 89 Aggressive("aggressive-ext-opt", cl::Hidden, 90 cl::desc("Aggressive extension optimization")); 91 92 static cl::opt<bool> 93 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), 94 cl::desc("Disable the peephole optimizer")); 95 96 static cl::opt<bool> 97 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), 98 cl::desc("Disable advanced copy optimization")); 99 100 STATISTIC(NumReuse, "Number of extension results reused"); 101 STATISTIC(NumCmps, "Number of compares eliminated"); 102 STATISTIC(NumImmFold, "Number of move immediate folded"); 103 STATISTIC(NumLoadFold, "Number of loads folded"); 104 STATISTIC(NumSelects, "Number of selects optimized"); 105 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); 106 STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); 107 108 namespace { 109 class PeepholeOptimizer : public MachineFunctionPass { 110 const TargetInstrInfo *TII; 111 const TargetRegisterInfo *TRI; 112 MachineRegisterInfo *MRI; 113 MachineDominatorTree *DT; // Machine dominator tree 114 115 public: 116 static char ID; // Pass identification 117 PeepholeOptimizer() : MachineFunctionPass(ID) { 118 initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); 119 } 120 121 bool runOnMachineFunction(MachineFunction &MF) override; 122 123 void getAnalysisUsage(AnalysisUsage &AU) const override { 124 AU.setPreservesCFG(); 125 MachineFunctionPass::getAnalysisUsage(AU); 126 if (Aggressive) { 127 AU.addRequired<MachineDominatorTree>(); 128 AU.addPreserved<MachineDominatorTree>(); 129 } 130 } 131 132 private: 133 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 134 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 135 SmallPtrSetImpl<MachineInstr*> &LocalMIs); 136 bool optimizeSelect(MachineInstr *MI); 137 bool optimizeCondBranch(MachineInstr *MI); 138 bool optimizeCopyOrBitcast(MachineInstr *MI); 139 bool optimizeCoalescableCopy(MachineInstr *MI); 140 bool optimizeUncoalescableCopy(MachineInstr *MI, 141 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 142 bool findNextSource(unsigned &Reg, unsigned &SubReg); 143 bool isMoveImmediate(MachineInstr *MI, 144 SmallSet<unsigned, 4> &ImmDefRegs, 145 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 146 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 147 SmallSet<unsigned, 4> &ImmDefRegs, 148 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 149 bool isLoadFoldable(MachineInstr *MI, 150 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); 151 152 /// \brief Check whether \p MI is understood by the register coalescer 153 /// but may require some rewriting. 154 bool isCoalescableCopy(const MachineInstr &MI) { 155 // SubregToRegs are not interesting, because they are already register 156 // coalescer friendly. 157 return MI.isCopy() || (!DisableAdvCopyOpt && 158 (MI.isRegSequence() || MI.isInsertSubreg() || 159 MI.isExtractSubreg())); 160 } 161 162 /// \brief Check whether \p MI is a copy like instruction that is 163 /// not recognized by the register coalescer. 164 bool isUncoalescableCopy(const MachineInstr &MI) { 165 return MI.isBitcast() || 166 (!DisableAdvCopyOpt && 167 (MI.isRegSequenceLike() || MI.isInsertSubregLike() || 168 MI.isExtractSubregLike())); 169 } 170 }; 171 172 /// \brief Helper class to track the possible sources of a value defined by 173 /// a (chain of) copy related instructions. 174 /// Given a definition (instruction and definition index), this class 175 /// follows the use-def chain to find successive suitable sources. 176 /// The given source can be used to rewrite the definition into 177 /// def = COPY src. 178 /// 179 /// For instance, let us consider the following snippet: 180 /// v0 = 181 /// v2 = INSERT_SUBREG v1, v0, sub0 182 /// def = COPY v2.sub0 183 /// 184 /// Using a ValueTracker for def = COPY v2.sub0 will give the following 185 /// suitable sources: 186 /// v2.sub0 and v0. 187 /// Then, def can be rewritten into def = COPY v0. 188 class ValueTracker { 189 private: 190 /// The current point into the use-def chain. 191 const MachineInstr *Def; 192 /// The index of the definition in Def. 193 unsigned DefIdx; 194 /// The sub register index of the definition. 195 unsigned DefSubReg; 196 /// The register where the value can be found. 197 unsigned Reg; 198 /// Specifiy whether or not the value tracking looks through 199 /// complex instructions. When this is false, the value tracker 200 /// bails on everything that is not a copy or a bitcast. 201 /// 202 /// Note: This could have been implemented as a specialized version of 203 /// the ValueTracker class but that would have complicated the code of 204 /// the users of this class. 205 bool UseAdvancedTracking; 206 /// MachineRegisterInfo used to perform tracking. 207 const MachineRegisterInfo &MRI; 208 /// Optional TargetInstrInfo used to perform some complex 209 /// tracking. 210 const TargetInstrInfo *TII; 211 212 /// \brief Dispatcher to the right underlying implementation of 213 /// getNextSource. 214 bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); 215 /// \brief Specialized version of getNextSource for Copy instructions. 216 bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); 217 /// \brief Specialized version of getNextSource for Bitcast instructions. 218 bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); 219 /// \brief Specialized version of getNextSource for RegSequence 220 /// instructions. 221 bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); 222 /// \brief Specialized version of getNextSource for InsertSubreg 223 /// instructions. 224 bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 225 /// \brief Specialized version of getNextSource for ExtractSubreg 226 /// instructions. 227 bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 228 /// \brief Specialized version of getNextSource for SubregToReg 229 /// instructions. 230 bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); 231 232 public: 233 /// \brief Create a ValueTracker instance for the value defined by \p Reg. 234 /// \p DefSubReg represents the sub register index the value tracker will 235 /// track. It does not need to match the sub register index used in the 236 /// definition of \p Reg. 237 /// \p UseAdvancedTracking specifies whether or not the value tracker looks 238 /// through complex instructions. By default (false), it handles only copy 239 /// and bitcast instructions. 240 /// If \p Reg is a physical register, a value tracker constructed with 241 /// this constructor will not find any alternative source. 242 /// Indeed, when \p Reg is a physical register that constructor does not 243 /// know which definition of \p Reg it should track. 244 /// Use the next constructor to track a physical register. 245 ValueTracker(unsigned Reg, unsigned DefSubReg, 246 const MachineRegisterInfo &MRI, 247 bool UseAdvancedTracking = false, 248 const TargetInstrInfo *TII = nullptr) 249 : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), 250 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 251 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 252 Def = MRI.getVRegDef(Reg); 253 DefIdx = MRI.def_begin(Reg).getOperandNo(); 254 } 255 } 256 257 /// \brief Create a ValueTracker instance for the value defined by 258 /// the pair \p MI, \p DefIdx. 259 /// Unlike the other constructor, the value tracker produced by this one 260 /// may be able to find a new source when the definition is a physical 261 /// register. 262 /// This could be useful to rewrite target specific instructions into 263 /// generic copy instructions. 264 ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, 265 const MachineRegisterInfo &MRI, 266 bool UseAdvancedTracking = false, 267 const TargetInstrInfo *TII = nullptr) 268 : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), 269 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 270 assert(DefIdx < Def->getDesc().getNumDefs() && 271 Def->getOperand(DefIdx).isReg() && "Invalid definition"); 272 Reg = Def->getOperand(DefIdx).getReg(); 273 } 274 275 /// \brief Following the use-def chain, get the next available source 276 /// for the tracked value. 277 /// When the returned value is not nullptr, \p SrcReg gives the register 278 /// that contain the tracked value. 279 /// \note The sub register index returned in \p SrcSubReg must be used 280 /// on \p SrcReg to access the actual value. 281 /// \return Unless the returned value is nullptr (i.e., no source found), 282 /// \p SrcReg gives the register of the next source used in the returned 283 /// instruction and \p SrcSubReg the sub-register index to be used on that 284 /// source to get the tracked value. When nullptr is returned, no 285 /// alternative source has been found. 286 const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); 287 288 /// \brief Get the last register where the initial value can be found. 289 /// Initially this is the register of the definition. 290 /// Then, after each successful call to getNextSource, this is the 291 /// register of the last source. 292 unsigned getReg() const { return Reg; } 293 }; 294 } 295 296 char PeepholeOptimizer::ID = 0; 297 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; 298 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", 299 "Peephole Optimizations", false, false) 300 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 301 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", 302 "Peephole Optimizations", false, false) 303 304 /// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads 305 /// a single register and writes a single register and it does not modify the 306 /// source, and if the source value is preserved as a sub-register of the 307 /// result, then replace all reachable uses of the source with the subreg of the 308 /// result. 309 /// 310 /// Do not generate an EXTRACT that is used only in a debug use, as this changes 311 /// the code. Since this code does not currently share EXTRACTs, just ignore all 312 /// debug uses. 313 bool PeepholeOptimizer:: 314 optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 315 SmallPtrSetImpl<MachineInstr*> &LocalMIs) { 316 unsigned SrcReg, DstReg, SubIdx; 317 if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) 318 return false; 319 320 if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 321 TargetRegisterInfo::isPhysicalRegister(SrcReg)) 322 return false; 323 324 if (MRI->hasOneNonDBGUse(SrcReg)) 325 // No other uses. 326 return false; 327 328 // Ensure DstReg can get a register class that actually supports 329 // sub-registers. Don't change the class until we commit. 330 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); 331 DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx); 332 if (!DstRC) 333 return false; 334 335 // The ext instr may be operating on a sub-register of SrcReg as well. 336 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit 337 // register. 338 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of 339 // SrcReg:SubIdx should be replaced. 340 bool UseSrcSubIdx = 341 TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; 342 343 // The source has other uses. See if we can replace the other uses with use of 344 // the result of the extension. 345 SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 346 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 347 ReachedBBs.insert(UI.getParent()); 348 349 // Uses that are in the same BB of uses of the result of the instruction. 350 SmallVector<MachineOperand*, 8> Uses; 351 352 // Uses that the result of the instruction can reach. 353 SmallVector<MachineOperand*, 8> ExtendedUses; 354 355 bool ExtendLife = true; 356 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { 357 MachineInstr *UseMI = UseMO.getParent(); 358 if (UseMI == MI) 359 continue; 360 361 if (UseMI->isPHI()) { 362 ExtendLife = false; 363 continue; 364 } 365 366 // Only accept uses of SrcReg:SubIdx. 367 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) 368 continue; 369 370 // It's an error to translate this: 371 // 372 // %reg1025 = <sext> %reg1024 373 // ... 374 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 375 // 376 // into this: 377 // 378 // %reg1025 = <sext> %reg1024 379 // ... 380 // %reg1027 = COPY %reg1025:4 381 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 382 // 383 // The problem here is that SUBREG_TO_REG is there to assert that an 384 // implicit zext occurs. It doesn't insert a zext instruction. If we allow 385 // the COPY here, it will give us the value after the <sext>, not the 386 // original value of %reg1024 before <sext>. 387 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 388 continue; 389 390 MachineBasicBlock *UseMBB = UseMI->getParent(); 391 if (UseMBB == MBB) { 392 // Local uses that come after the extension. 393 if (!LocalMIs.count(UseMI)) 394 Uses.push_back(&UseMO); 395 } else if (ReachedBBs.count(UseMBB)) { 396 // Non-local uses where the result of the extension is used. Always 397 // replace these unless it's a PHI. 398 Uses.push_back(&UseMO); 399 } else if (Aggressive && DT->dominates(MBB, UseMBB)) { 400 // We may want to extend the live range of the extension result in order 401 // to replace these uses. 402 ExtendedUses.push_back(&UseMO); 403 } else { 404 // Both will be live out of the def MBB anyway. Don't extend live range of 405 // the extension result. 406 ExtendLife = false; 407 break; 408 } 409 } 410 411 if (ExtendLife && !ExtendedUses.empty()) 412 // Extend the liveness of the extension result. 413 std::copy(ExtendedUses.begin(), ExtendedUses.end(), 414 std::back_inserter(Uses)); 415 416 // Now replace all uses. 417 bool Changed = false; 418 if (!Uses.empty()) { 419 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 420 421 // Look for PHI uses of the extended result, we don't want to extend the 422 // liveness of a PHI input. It breaks all kinds of assumptions down 423 // stream. A PHI use is expected to be the kill of its source values. 424 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 425 if (UI.isPHI()) 426 PHIBBs.insert(UI.getParent()); 427 428 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 429 for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 430 MachineOperand *UseMO = Uses[i]; 431 MachineInstr *UseMI = UseMO->getParent(); 432 MachineBasicBlock *UseMBB = UseMI->getParent(); 433 if (PHIBBs.count(UseMBB)) 434 continue; 435 436 // About to add uses of DstReg, clear DstReg's kill flags. 437 if (!Changed) { 438 MRI->clearKillFlags(DstReg); 439 MRI->constrainRegClass(DstReg, DstRC); 440 } 441 442 unsigned NewVR = MRI->createVirtualRegister(RC); 443 MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 444 TII->get(TargetOpcode::COPY), NewVR) 445 .addReg(DstReg, 0, SubIdx); 446 // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. 447 if (UseSrcSubIdx) { 448 Copy->getOperand(0).setSubReg(SubIdx); 449 Copy->getOperand(0).setIsUndef(); 450 } 451 UseMO->setReg(NewVR); 452 ++NumReuse; 453 Changed = true; 454 } 455 } 456 457 return Changed; 458 } 459 460 /// optimizeCmpInstr - If the instruction is a compare and the previous 461 /// instruction it's comparing against all ready sets (or could be modified to 462 /// set) the same flag as the compare, then we can remove the comparison and use 463 /// the flag from the previous instruction. 464 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, 465 MachineBasicBlock *MBB) { 466 // If this instruction is a comparison against zero and isn't comparing a 467 // physical register, we can try to optimize it. 468 unsigned SrcReg, SrcReg2; 469 int CmpMask, CmpValue; 470 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || 471 TargetRegisterInfo::isPhysicalRegister(SrcReg) || 472 (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) 473 return false; 474 475 // Attempt to optimize the comparison instruction. 476 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { 477 ++NumCmps; 478 return true; 479 } 480 481 return false; 482 } 483 484 /// Optimize a select instruction. 485 bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { 486 unsigned TrueOp = 0; 487 unsigned FalseOp = 0; 488 bool Optimizable = false; 489 SmallVector<MachineOperand, 4> Cond; 490 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) 491 return false; 492 if (!Optimizable) 493 return false; 494 if (!TII->optimizeSelect(MI)) 495 return false; 496 MI->eraseFromParent(); 497 ++NumSelects; 498 return true; 499 } 500 501 /// \brief Check if a simpler conditional branch can be 502 // generated 503 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { 504 return TII->optimizeCondBranch(MI); 505 } 506 507 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg) 508 /// share the same register file. 509 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, 510 const TargetRegisterClass *DefRC, 511 unsigned DefSubReg, 512 const TargetRegisterClass *SrcRC, 513 unsigned SrcSubReg) { 514 // Same register class. 515 if (DefRC == SrcRC) 516 return true; 517 518 // Both operands are sub registers. Check if they share a register class. 519 unsigned SrcIdx, DefIdx; 520 if (SrcSubReg && DefSubReg) 521 return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, 522 SrcIdx, DefIdx) != nullptr; 523 // At most one of the register is a sub register, make it Src to avoid 524 // duplicating the test. 525 if (!SrcSubReg) { 526 std::swap(DefSubReg, SrcSubReg); 527 std::swap(DefRC, SrcRC); 528 } 529 530 // One of the register is a sub register, check if we can get a superclass. 531 if (SrcSubReg) 532 return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; 533 // Plain copy. 534 return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; 535 } 536 537 /// \brief Try to find the next source that share the same register file 538 /// for the value defined by \p Reg and \p SubReg. 539 /// When true is returned, \p Reg and \p SubReg are updated with the 540 /// register number and sub-register index of the new source. 541 /// \return False if no alternative sources are available. True otherwise. 542 bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { 543 // Do not try to find a new source for a physical register. 544 // So far we do not have any motivating example for doing that. 545 // Thus, instead of maintaining untested code, we will revisit that if 546 // that changes at some point. 547 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 548 return false; 549 550 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); 551 unsigned DefSubReg = SubReg; 552 553 unsigned Src; 554 unsigned SrcSubReg; 555 bool ShouldRewrite = false; 556 557 // Follow the chain of copies until we reach the top of the use-def chain 558 // or find a more suitable source. 559 ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); 560 do { 561 unsigned CopySrcReg, CopySrcSubReg; 562 if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) 563 break; 564 Src = CopySrcReg; 565 SrcSubReg = CopySrcSubReg; 566 567 // Do not extend the live-ranges of physical registers as they add 568 // constraints to the register allocator. 569 // Moreover, if we want to extend the live-range of a physical register, 570 // unlike SSA virtual register, we will have to check that they are not 571 // redefine before the related use. 572 if (TargetRegisterInfo::isPhysicalRegister(Src)) 573 break; 574 575 const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); 576 577 // If this source does not incur a cross register bank copy, use it. 578 ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, 579 SrcSubReg); 580 } while (!ShouldRewrite); 581 582 // If we did not find a more suitable source, there is nothing to optimize. 583 if (!ShouldRewrite || Src == Reg) 584 return false; 585 586 Reg = Src; 587 SubReg = SrcSubReg; 588 return true; 589 } 590 591 namespace { 592 /// \brief Helper class to rewrite the arguments of a copy-like instruction. 593 class CopyRewriter { 594 protected: 595 /// The copy-like instruction. 596 MachineInstr &CopyLike; 597 /// The index of the source being rewritten. 598 unsigned CurrentSrcIdx; 599 600 public: 601 CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} 602 603 virtual ~CopyRewriter() {} 604 605 /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and 606 /// the related value that it affects (TrackReg, TrackSubReg). 607 /// A source is considered rewritable if its register class and the 608 /// register class of the related TrackReg may not be register 609 /// coalescer friendly. In other words, given a copy-like instruction 610 /// not all the arguments may be returned at rewritable source, since 611 /// some arguments are none to be register coalescer friendly. 612 /// 613 /// Each call of this method moves the current source to the next 614 /// rewritable source. 615 /// For instance, let CopyLike be the instruction to rewrite. 616 /// CopyLike has one definition and one source: 617 /// dst.dstSubIdx = CopyLike src.srcSubIdx. 618 /// 619 /// The first call will give the first rewritable source, i.e., 620 /// the only source this instruction has: 621 /// (SrcReg, SrcSubReg) = (src, srcSubIdx). 622 /// This source defines the whole definition, i.e., 623 /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). 624 /// 625 /// The second and subsequent calls will return false, has there is only one 626 /// rewritable source. 627 /// 628 /// \return True if a rewritable source has been found, false otherwise. 629 /// The output arguments are valid if and only if true is returned. 630 virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 631 unsigned &TrackReg, 632 unsigned &TrackSubReg) { 633 // If CurrentSrcIdx == 1, this means this function has already been 634 // called once. CopyLike has one defintiion and one argument, thus, 635 // there is nothing else to rewrite. 636 if (!CopyLike.isCopy() || CurrentSrcIdx == 1) 637 return false; 638 // This is the first call to getNextRewritableSource. 639 // Move the CurrentSrcIdx to remember that we made that call. 640 CurrentSrcIdx = 1; 641 // The rewritable source is the argument. 642 const MachineOperand &MOSrc = CopyLike.getOperand(1); 643 SrcReg = MOSrc.getReg(); 644 SrcSubReg = MOSrc.getSubReg(); 645 // What we track are the alternative sources of the definition. 646 const MachineOperand &MODef = CopyLike.getOperand(0); 647 TrackReg = MODef.getReg(); 648 TrackSubReg = MODef.getSubReg(); 649 return true; 650 } 651 652 /// \brief Rewrite the current source with \p NewReg and \p NewSubReg 653 /// if possible. 654 /// \return True if the rewritting was possible, false otherwise. 655 virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { 656 if (!CopyLike.isCopy() || CurrentSrcIdx != 1) 657 return false; 658 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); 659 MOSrc.setReg(NewReg); 660 MOSrc.setSubReg(NewSubReg); 661 return true; 662 } 663 }; 664 665 /// \brief Specialized rewriter for INSERT_SUBREG instruction. 666 class InsertSubregRewriter : public CopyRewriter { 667 public: 668 InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { 669 assert(MI.isInsertSubreg() && "Invalid instruction"); 670 } 671 672 /// \brief See CopyRewriter::getNextRewritableSource. 673 /// Here CopyLike has the following form: 674 /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. 675 /// Src1 has the same register class has dst, hence, there is 676 /// nothing to rewrite. 677 /// Src2.src2SubIdx, may not be register coalescer friendly. 678 /// Therefore, the first call to this method returns: 679 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 680 /// (TrackReg, TrackSubReg) = (dst, subIdx). 681 /// 682 /// Subsequence calls will return false. 683 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 684 unsigned &TrackReg, 685 unsigned &TrackSubReg) override { 686 // If we already get the only source we can rewrite, return false. 687 if (CurrentSrcIdx == 2) 688 return false; 689 // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. 690 CurrentSrcIdx = 2; 691 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); 692 SrcReg = MOInsertedReg.getReg(); 693 SrcSubReg = MOInsertedReg.getSubReg(); 694 const MachineOperand &MODef = CopyLike.getOperand(0); 695 696 // We want to track something that is compatible with the 697 // partial definition. 698 TrackReg = MODef.getReg(); 699 if (MODef.getSubReg()) 700 // Bails if we have to compose sub-register indices. 701 return false; 702 TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); 703 return true; 704 } 705 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 706 if (CurrentSrcIdx != 2) 707 return false; 708 // We are rewriting the inserted reg. 709 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 710 MO.setReg(NewReg); 711 MO.setSubReg(NewSubReg); 712 return true; 713 } 714 }; 715 716 /// \brief Specialized rewriter for EXTRACT_SUBREG instruction. 717 class ExtractSubregRewriter : public CopyRewriter { 718 const TargetInstrInfo &TII; 719 720 public: 721 ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) 722 : CopyRewriter(MI), TII(TII) { 723 assert(MI.isExtractSubreg() && "Invalid instruction"); 724 } 725 726 /// \brief See CopyRewriter::getNextRewritableSource. 727 /// Here CopyLike has the following form: 728 /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. 729 /// There is only one rewritable source: Src.subIdx, 730 /// which defines dst.dstSubIdx. 731 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 732 unsigned &TrackReg, 733 unsigned &TrackSubReg) override { 734 // If we already get the only source we can rewrite, return false. 735 if (CurrentSrcIdx == 1) 736 return false; 737 // We are looking at v1 = EXTRACT_SUBREG v0, sub0. 738 CurrentSrcIdx = 1; 739 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); 740 SrcReg = MOExtractedReg.getReg(); 741 // If we have to compose sub-register indices, bails out. 742 if (MOExtractedReg.getSubReg()) 743 return false; 744 745 SrcSubReg = CopyLike.getOperand(2).getImm(); 746 747 // We want to track something that is compatible with the definition. 748 const MachineOperand &MODef = CopyLike.getOperand(0); 749 TrackReg = MODef.getReg(); 750 TrackSubReg = MODef.getSubReg(); 751 return true; 752 } 753 754 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 755 // The only source we can rewrite is the input register. 756 if (CurrentSrcIdx != 1) 757 return false; 758 759 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); 760 761 // If we find a source that does not require to extract something, 762 // rewrite the operation with a copy. 763 if (!NewSubReg) { 764 // Move the current index to an invalid position. 765 // We do not want another call to this method to be able 766 // to do any change. 767 CurrentSrcIdx = -1; 768 // Rewrite the operation as a COPY. 769 // Get rid of the sub-register index. 770 CopyLike.RemoveOperand(2); 771 // Morph the operation into a COPY. 772 CopyLike.setDesc(TII.get(TargetOpcode::COPY)); 773 return true; 774 } 775 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); 776 return true; 777 } 778 }; 779 780 /// \brief Specialized rewriter for REG_SEQUENCE instruction. 781 class RegSequenceRewriter : public CopyRewriter { 782 public: 783 RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { 784 assert(MI.isRegSequence() && "Invalid instruction"); 785 } 786 787 /// \brief See CopyRewriter::getNextRewritableSource. 788 /// Here CopyLike has the following form: 789 /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. 790 /// Each call will return a different source, walking all the available 791 /// source. 792 /// 793 /// The first call returns: 794 /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). 795 /// (TrackReg, TrackSubReg) = (dst, subIdx1). 796 /// 797 /// The second call returns: 798 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 799 /// (TrackReg, TrackSubReg) = (dst, subIdx2). 800 /// 801 /// And so on, until all the sources have been traversed, then 802 /// it returns false. 803 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 804 unsigned &TrackReg, 805 unsigned &TrackSubReg) override { 806 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. 807 808 // If this is the first call, move to the first argument. 809 if (CurrentSrcIdx == 0) { 810 CurrentSrcIdx = 1; 811 } else { 812 // Otherwise, move to the next argument and check that it is valid. 813 CurrentSrcIdx += 2; 814 if (CurrentSrcIdx >= CopyLike.getNumOperands()) 815 return false; 816 } 817 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); 818 SrcReg = MOInsertedReg.getReg(); 819 // If we have to compose sub-register indices, bails out. 820 if ((SrcSubReg = MOInsertedReg.getSubReg())) 821 return false; 822 823 // We want to track something that is compatible with the related 824 // partial definition. 825 TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); 826 827 const MachineOperand &MODef = CopyLike.getOperand(0); 828 TrackReg = MODef.getReg(); 829 // If we have to compose sub-registers, bails. 830 return MODef.getSubReg() == 0; 831 } 832 833 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 834 // We cannot rewrite out of bound operands. 835 // Moreover, rewritable sources are at odd positions. 836 if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) 837 return false; 838 839 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 840 MO.setReg(NewReg); 841 MO.setSubReg(NewSubReg); 842 return true; 843 } 844 }; 845 } // End namespace. 846 847 /// \brief Get the appropriated CopyRewriter for \p MI. 848 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr 849 /// if no rewriter works for \p MI. 850 static CopyRewriter *getCopyRewriter(MachineInstr &MI, 851 const TargetInstrInfo &TII) { 852 switch (MI.getOpcode()) { 853 default: 854 return nullptr; 855 case TargetOpcode::COPY: 856 return new CopyRewriter(MI); 857 case TargetOpcode::INSERT_SUBREG: 858 return new InsertSubregRewriter(MI); 859 case TargetOpcode::EXTRACT_SUBREG: 860 return new ExtractSubregRewriter(MI, TII); 861 case TargetOpcode::REG_SEQUENCE: 862 return new RegSequenceRewriter(MI); 863 } 864 llvm_unreachable(nullptr); 865 } 866 867 /// \brief Optimize generic copy instructions to avoid cross 868 /// register bank copy. The optimization looks through a chain of 869 /// copies and tries to find a source that has a compatible register 870 /// class. 871 /// Two register classes are considered to be compatible if they share 872 /// the same register bank. 873 /// New copies issued by this optimization are register allocator 874 /// friendly. This optimization does not remove any copy as it may 875 /// overconstraint the register allocator, but replaces some operands 876 /// when possible. 877 /// \pre isCoalescableCopy(*MI) is true. 878 /// \return True, when \p MI has been rewritten. False otherwise. 879 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { 880 assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); 881 assert(MI->getDesc().getNumDefs() == 1 && 882 "Coalescer can understand multiple defs?!"); 883 const MachineOperand &MODef = MI->getOperand(0); 884 // Do not rewrite physical definitions. 885 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 886 return false; 887 888 bool Changed = false; 889 // Get the right rewriter for the current copy. 890 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII)); 891 // If none exists, bails out. 892 if (!CpyRewriter) 893 return false; 894 // Rewrite each rewritable source. 895 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; 896 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, 897 TrackSubReg)) { 898 unsigned NewSrc = TrackReg; 899 unsigned NewSubReg = TrackSubReg; 900 // Try to find a more suitable source. 901 // If we failed to do so, or get the actual source, 902 // move to the next source. 903 if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) 904 continue; 905 // Rewrite source. 906 if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { 907 // We may have extended the live-range of NewSrc, account for that. 908 MRI->clearKillFlags(NewSrc); 909 Changed = true; 910 } 911 } 912 // TODO: We could have a clean-up method to tidy the instruction. 913 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 914 // => v0 = COPY v1 915 // Currently we haven't seen motivating example for that and we 916 // want to avoid untested code. 917 NumRewrittenCopies += Changed == true; 918 return Changed; 919 } 920 921 /// \brief Optimize copy-like instructions to create 922 /// register coalescer friendly instruction. 923 /// The optimization tries to kill-off the \p MI by looking 924 /// through a chain of copies to find a source that has a compatible 925 /// register class. 926 /// If such a source is found, it replace \p MI by a generic COPY 927 /// operation. 928 /// \pre isUncoalescableCopy(*MI) is true. 929 /// \return True, when \p MI has been optimized. In that case, \p MI has 930 /// been removed from its parent. 931 /// All COPY instructions created, are inserted in \p LocalMIs. 932 bool PeepholeOptimizer::optimizeUncoalescableCopy( 933 MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 934 assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); 935 936 // Check if we can rewrite all the values defined by this instruction. 937 SmallVector< 938 std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, 939 4> RewritePairs; 940 for (const MachineOperand &MODef : MI->defs()) { 941 if (MODef.isDead()) 942 // We can ignore those. 943 continue; 944 945 // If a physical register is here, this is probably for a good reason. 946 // Do not rewrite that. 947 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 948 return false; 949 950 // If we do not know how to rewrite this definition, there is no point 951 // in trying to kill this instruction. 952 TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); 953 TargetInstrInfo::RegSubRegPair Src = Def; 954 if (!findNextSource(Src.Reg, Src.SubReg)) 955 return false; 956 RewritePairs.push_back(std::make_pair(Def, Src)); 957 } 958 // The change is possible for all defs, do it. 959 for (const auto &PairDefSrc : RewritePairs) { 960 const auto &Def = PairDefSrc.first; 961 const auto &Src = PairDefSrc.second; 962 // Rewrite the "copy" in a way the register coalescer understands. 963 assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && 964 "We do not rewrite physical registers"); 965 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); 966 unsigned NewVR = MRI->createVirtualRegister(DefRC); 967 MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 968 TII->get(TargetOpcode::COPY), 969 NewVR).addReg(Src.Reg, 0, Src.SubReg); 970 NewCopy->getOperand(0).setSubReg(Def.SubReg); 971 if (Def.SubReg) 972 NewCopy->getOperand(0).setIsUndef(); 973 LocalMIs.insert(NewCopy); 974 MRI->replaceRegWith(Def.Reg, NewVR); 975 MRI->clearKillFlags(NewVR); 976 // We extended the lifetime of Src. 977 // Clear the kill flags to account for that. 978 MRI->clearKillFlags(Src.Reg); 979 } 980 // MI is now dead. 981 MI->eraseFromParent(); 982 ++NumUncoalescableCopies; 983 return true; 984 } 985 986 /// isLoadFoldable - Check whether MI is a candidate for folding into a later 987 /// instruction. We only fold loads to virtual registers and the virtual 988 /// register defined has a single use. 989 bool PeepholeOptimizer::isLoadFoldable( 990 MachineInstr *MI, 991 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { 992 if (!MI->canFoldAsLoad() || !MI->mayLoad()) 993 return false; 994 const MCInstrDesc &MCID = MI->getDesc(); 995 if (MCID.getNumDefs() != 1) 996 return false; 997 998 unsigned Reg = MI->getOperand(0).getReg(); 999 // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting 1000 // loads. It should be checked when processing uses of the load, since 1001 // uses can be removed during peephole. 1002 if (!MI->getOperand(0).getSubReg() && 1003 TargetRegisterInfo::isVirtualRegister(Reg) && 1004 MRI->hasOneNonDBGUse(Reg)) { 1005 FoldAsLoadDefCandidates.insert(Reg); 1006 return true; 1007 } 1008 return false; 1009 } 1010 1011 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, 1012 SmallSet<unsigned, 4> &ImmDefRegs, 1013 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1014 const MCInstrDesc &MCID = MI->getDesc(); 1015 if (!MI->isMoveImmediate()) 1016 return false; 1017 if (MCID.getNumDefs() != 1) 1018 return false; 1019 unsigned Reg = MI->getOperand(0).getReg(); 1020 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1021 ImmDefMIs.insert(std::make_pair(Reg, MI)); 1022 ImmDefRegs.insert(Reg); 1023 return true; 1024 } 1025 1026 return false; 1027 } 1028 1029 /// foldImmediate - Try folding register operands that are defined by move 1030 /// immediate instructions, i.e. a trivial constant folding optimization, if 1031 /// and only if the def and use are in the same BB. 1032 bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 1033 SmallSet<unsigned, 4> &ImmDefRegs, 1034 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1035 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1036 MachineOperand &MO = MI->getOperand(i); 1037 if (!MO.isReg() || MO.isDef()) 1038 continue; 1039 unsigned Reg = MO.getReg(); 1040 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1041 continue; 1042 if (ImmDefRegs.count(Reg) == 0) 1043 continue; 1044 DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); 1045 assert(II != ImmDefMIs.end()); 1046 if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { 1047 ++NumImmFold; 1048 return true; 1049 } 1050 } 1051 return false; 1052 } 1053 1054 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 1055 if (skipOptnoneFunction(*MF.getFunction())) 1056 return false; 1057 1058 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); 1059 DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); 1060 1061 if (DisablePeephole) 1062 return false; 1063 1064 TII = MF.getSubtarget().getInstrInfo(); 1065 TRI = MF.getSubtarget().getRegisterInfo(); 1066 MRI = &MF.getRegInfo(); 1067 DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; 1068 1069 bool Changed = false; 1070 1071 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 1072 MachineBasicBlock *MBB = &*I; 1073 1074 bool SeenMoveImm = false; 1075 SmallPtrSet<MachineInstr*, 16> LocalMIs; 1076 SmallSet<unsigned, 4> ImmDefRegs; 1077 DenseMap<unsigned, MachineInstr*> ImmDefMIs; 1078 SmallSet<unsigned, 16> FoldAsLoadDefCandidates; 1079 1080 for (MachineBasicBlock::iterator 1081 MII = I->begin(), MIE = I->end(); MII != MIE; ) { 1082 MachineInstr *MI = &*MII; 1083 // We may be erasing MI below, increment MII now. 1084 ++MII; 1085 LocalMIs.insert(MI); 1086 1087 // Skip debug values. They should not affect this peephole optimization. 1088 if (MI->isDebugValue()) 1089 continue; 1090 1091 // If there exists an instruction which belongs to the following 1092 // categories, we will discard the load candidates. 1093 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || 1094 MI->isKill() || MI->isInlineAsm() || 1095 MI->hasUnmodeledSideEffects()) { 1096 FoldAsLoadDefCandidates.clear(); 1097 continue; 1098 } 1099 if (MI->mayStore() || MI->isCall()) 1100 FoldAsLoadDefCandidates.clear(); 1101 1102 if ((isUncoalescableCopy(*MI) && 1103 optimizeUncoalescableCopy(MI, LocalMIs)) || 1104 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || 1105 (MI->isSelect() && optimizeSelect(MI))) { 1106 // MI is deleted. 1107 LocalMIs.erase(MI); 1108 Changed = true; 1109 continue; 1110 } 1111 1112 if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { 1113 Changed = true; 1114 continue; 1115 } 1116 1117 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { 1118 // MI is just rewritten. 1119 Changed = true; 1120 continue; 1121 } 1122 1123 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { 1124 SeenMoveImm = true; 1125 } else { 1126 Changed |= optimizeExtInstr(MI, MBB, LocalMIs); 1127 // optimizeExtInstr might have created new instructions after MI 1128 // and before the already incremented MII. Adjust MII so that the 1129 // next iteration sees the new instructions. 1130 MII = MI; 1131 ++MII; 1132 if (SeenMoveImm) 1133 Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); 1134 } 1135 1136 // Check whether MI is a load candidate for folding into a later 1137 // instruction. If MI is not a candidate, check whether we can fold an 1138 // earlier load into MI. 1139 if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && 1140 !FoldAsLoadDefCandidates.empty()) { 1141 const MCInstrDesc &MIDesc = MI->getDesc(); 1142 for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); 1143 ++i) { 1144 const MachineOperand &MOp = MI->getOperand(i); 1145 if (!MOp.isReg()) 1146 continue; 1147 unsigned FoldAsLoadDefReg = MOp.getReg(); 1148 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { 1149 // We need to fold load after optimizeCmpInstr, since 1150 // optimizeCmpInstr can enable folding by converting SUB to CMP. 1151 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and 1152 // we need it for markUsesInDebugValueAsUndef(). 1153 unsigned FoldedReg = FoldAsLoadDefReg; 1154 MachineInstr *DefMI = nullptr; 1155 MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, 1156 FoldAsLoadDefReg, 1157 DefMI); 1158 if (FoldMI) { 1159 // Update LocalMIs since we replaced MI with FoldMI and deleted 1160 // DefMI. 1161 DEBUG(dbgs() << "Replacing: " << *MI); 1162 DEBUG(dbgs() << " With: " << *FoldMI); 1163 LocalMIs.erase(MI); 1164 LocalMIs.erase(DefMI); 1165 LocalMIs.insert(FoldMI); 1166 MI->eraseFromParent(); 1167 DefMI->eraseFromParent(); 1168 MRI->markUsesInDebugValueAsUndef(FoldedReg); 1169 FoldAsLoadDefCandidates.erase(FoldedReg); 1170 ++NumLoadFold; 1171 // MI is replaced with FoldMI. 1172 Changed = true; 1173 break; 1174 } 1175 } 1176 } 1177 } 1178 } 1179 } 1180 1181 return Changed; 1182 } 1183 1184 bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, 1185 unsigned &SrcSubReg) { 1186 assert(Def->isCopy() && "Invalid definition"); 1187 // Copy instruction are supposed to be: Def = Src. 1188 // If someone breaks this assumption, bad things will happen everywhere. 1189 assert(Def->getNumOperands() == 2 && "Invalid number of operands"); 1190 1191 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1192 // If we look for a different subreg, it means we want a subreg of src. 1193 // Bails as we do not support composing subreg yet. 1194 return false; 1195 // Otherwise, we want the whole source. 1196 const MachineOperand &Src = Def->getOperand(1); 1197 SrcReg = Src.getReg(); 1198 SrcSubReg = Src.getSubReg(); 1199 return true; 1200 } 1201 1202 bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, 1203 unsigned &SrcSubReg) { 1204 assert(Def->isBitcast() && "Invalid definition"); 1205 1206 // Bail if there are effects that a plain copy will not expose. 1207 if (Def->hasUnmodeledSideEffects()) 1208 return false; 1209 1210 // Bitcasts with more than one def are not supported. 1211 if (Def->getDesc().getNumDefs() != 1) 1212 return false; 1213 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1214 // If we look for a different subreg, it means we want a subreg of the src. 1215 // Bails as we do not support composing subreg yet. 1216 return false; 1217 1218 unsigned SrcIdx = Def->getNumOperands(); 1219 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; 1220 ++OpIdx) { 1221 const MachineOperand &MO = Def->getOperand(OpIdx); 1222 if (!MO.isReg() || !MO.getReg()) 1223 continue; 1224 assert(!MO.isDef() && "We should have skipped all the definitions by now"); 1225 if (SrcIdx != EndOpIdx) 1226 // Multiple sources? 1227 return false; 1228 SrcIdx = OpIdx; 1229 } 1230 const MachineOperand &Src = Def->getOperand(SrcIdx); 1231 SrcReg = Src.getReg(); 1232 SrcSubReg = Src.getSubReg(); 1233 return true; 1234 } 1235 1236 bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, 1237 unsigned &SrcSubReg) { 1238 assert((Def->isRegSequence() || Def->isRegSequenceLike()) && 1239 "Invalid definition"); 1240 1241 if (Def->getOperand(DefIdx).getSubReg()) 1242 // If we are composing subreg, bails out. 1243 // The case we are checking is Def.<subreg> = REG_SEQUENCE. 1244 // This should almost never happen as the SSA property is tracked at 1245 // the register level (as opposed to the subreg level). 1246 // I.e., 1247 // Def.sub0 = 1248 // Def.sub1 = 1249 // is a valid SSA representation for Def.sub0 and Def.sub1, but not for 1250 // Def. Thus, it must not be generated. 1251 // However, some code could theoretically generates a single 1252 // Def.sub0 (i.e, not defining the other subregs) and we would 1253 // have this case. 1254 // If we can ascertain (or force) that this never happens, we could 1255 // turn that into an assertion. 1256 return false; 1257 1258 if (!TII) 1259 // We could handle the REG_SEQUENCE here, but we do not want to 1260 // duplicate the code from the generic TII. 1261 return false; 1262 1263 SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; 1264 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) 1265 return false; 1266 1267 // We are looking at: 1268 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... 1269 // Check if one of the operand defines the subreg we are interested in. 1270 for (auto &RegSeqInput : RegSeqInputRegs) { 1271 if (RegSeqInput.SubIdx == DefSubReg) { 1272 if (RegSeqInput.SubReg) 1273 // Bails if we have to compose sub registers. 1274 return false; 1275 1276 SrcReg = RegSeqInput.Reg; 1277 SrcSubReg = RegSeqInput.SubReg; 1278 return true; 1279 } 1280 } 1281 1282 // If the subreg we are tracking is super-defined by another subreg, 1283 // we could follow this value. However, this would require to compose 1284 // the subreg and we do not do that for now. 1285 return false; 1286 } 1287 1288 bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, 1289 unsigned &SrcSubReg) { 1290 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && 1291 "Invalid definition"); 1292 1293 if (Def->getOperand(DefIdx).getSubReg()) 1294 // If we are composing subreg, bails out. 1295 // Same remark as getNextSourceFromRegSequence. 1296 // I.e., this may be turned into an assert. 1297 return false; 1298 1299 if (!TII) 1300 // We could handle the REG_SEQUENCE here, but we do not want to 1301 // duplicate the code from the generic TII. 1302 return false; 1303 1304 TargetInstrInfo::RegSubRegPair BaseReg; 1305 TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; 1306 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) 1307 return false; 1308 1309 // We are looking at: 1310 // Def = INSERT_SUBREG v0, v1, sub1 1311 // There are two cases: 1312 // 1. DefSubReg == sub1, get v1. 1313 // 2. DefSubReg != sub1, the value may be available through v0. 1314 1315 // #1 Check if the inserted register matches the required sub index. 1316 if (InsertedReg.SubIdx == DefSubReg) { 1317 SrcReg = InsertedReg.Reg; 1318 SrcSubReg = InsertedReg.SubReg; 1319 return true; 1320 } 1321 // #2 Otherwise, if the sub register we are looking for is not partial 1322 // defined by the inserted element, we can look through the main 1323 // register (v0). 1324 const MachineOperand &MODef = Def->getOperand(DefIdx); 1325 // If the result register (Def) and the base register (v0) do not 1326 // have the same register class or if we have to compose 1327 // subregisters, bails out. 1328 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || 1329 BaseReg.SubReg) 1330 return false; 1331 1332 // Get the TRI and check if the inserted sub-register overlaps with the 1333 // sub-register we are tracking. 1334 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 1335 if (!TRI || 1336 (TRI->getSubRegIndexLaneMask(DefSubReg) & 1337 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) 1338 return false; 1339 // At this point, the value is available in v0 via the same subreg 1340 // we used for Def. 1341 SrcReg = BaseReg.Reg; 1342 SrcSubReg = DefSubReg; 1343 return true; 1344 } 1345 1346 bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, 1347 unsigned &SrcSubReg) { 1348 assert((Def->isExtractSubreg() || 1349 Def->isExtractSubregLike()) && "Invalid definition"); 1350 // We are looking at: 1351 // Def = EXTRACT_SUBREG v0, sub0 1352 1353 // Bails if we have to compose sub registers. 1354 // Indeed, if DefSubReg != 0, we would have to compose it with sub0. 1355 if (DefSubReg) 1356 return false; 1357 1358 if (!TII) 1359 // We could handle the EXTRACT_SUBREG here, but we do not want to 1360 // duplicate the code from the generic TII. 1361 return false; 1362 1363 TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; 1364 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) 1365 return false; 1366 1367 // Bails if we have to compose sub registers. 1368 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. 1369 if (ExtractSubregInputReg.SubReg) 1370 return false; 1371 // Otherwise, the value is available in the v0.sub0. 1372 SrcReg = ExtractSubregInputReg.Reg; 1373 SrcSubReg = ExtractSubregInputReg.SubIdx; 1374 return true; 1375 } 1376 1377 bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, 1378 unsigned &SrcSubReg) { 1379 assert(Def->isSubregToReg() && "Invalid definition"); 1380 // We are looking at: 1381 // Def = SUBREG_TO_REG Imm, v0, sub0 1382 1383 // Bails if we have to compose sub registers. 1384 // If DefSubReg != sub0, we would have to check that all the bits 1385 // we track are included in sub0 and if yes, we would have to 1386 // determine the right subreg in v0. 1387 if (DefSubReg != Def->getOperand(3).getImm()) 1388 return false; 1389 // Bails if we have to compose sub registers. 1390 // Likewise, if v0.subreg != 0, we would have to compose it with sub0. 1391 if (Def->getOperand(2).getSubReg()) 1392 return false; 1393 1394 SrcReg = Def->getOperand(2).getReg(); 1395 SrcSubReg = Def->getOperand(3).getImm(); 1396 return true; 1397 } 1398 1399 bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { 1400 assert(Def && "This method needs a valid definition"); 1401 1402 assert( 1403 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && 1404 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); 1405 if (Def->isCopy()) 1406 return getNextSourceFromCopy(SrcReg, SrcSubReg); 1407 if (Def->isBitcast()) 1408 return getNextSourceFromBitcast(SrcReg, SrcSubReg); 1409 // All the remaining cases involve "complex" instructions. 1410 // Bails if we did not ask for the advanced tracking. 1411 if (!UseAdvancedTracking) 1412 return false; 1413 if (Def->isRegSequence() || Def->isRegSequenceLike()) 1414 return getNextSourceFromRegSequence(SrcReg, SrcSubReg); 1415 if (Def->isInsertSubreg() || Def->isInsertSubregLike()) 1416 return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); 1417 if (Def->isExtractSubreg() || Def->isExtractSubregLike()) 1418 return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); 1419 if (Def->isSubregToReg()) 1420 return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); 1421 return false; 1422 } 1423 1424 const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, 1425 unsigned &SrcSubReg) { 1426 // If we reach a point where we cannot move up in the use-def chain, 1427 // there is nothing we can get. 1428 if (!Def) 1429 return nullptr; 1430 1431 const MachineInstr *PrevDef = nullptr; 1432 // Try to find the next source. 1433 if (getNextSourceImpl(SrcReg, SrcSubReg)) { 1434 // Update definition, definition index, and subregister for the 1435 // next call of getNextSource. 1436 // Update the current register. 1437 Reg = SrcReg; 1438 // Update the return value before moving up in the use-def chain. 1439 PrevDef = Def; 1440 // If we can still move up in the use-def chain, move to the next 1441 // defintion. 1442 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 1443 Def = MRI.getVRegDef(Reg); 1444 DefIdx = MRI.def_begin(Reg).getOperandNo(); 1445 DefSubReg = SrcSubReg; 1446 return PrevDef; 1447 } 1448 } 1449 // If we end up here, this means we will not be able to find another source 1450 // for the next iteration. 1451 // Make sure any new call to getNextSource bails out early by cutting the 1452 // use-def chain. 1453 Def = nullptr; 1454 return PrevDef; 1455 } 1456