1 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Perform peephole optimizations on the machine code: 11 // 12 // - Optimize Extensions 13 // 14 // Optimization of sign / zero extension instructions. It may be extended to 15 // handle other instructions with similar properties. 16 // 17 // On some targets, some instructions, e.g. X86 sign / zero extension, may 18 // leave the source value in the lower part of the result. This optimization 19 // will replace some uses of the pre-extension value with uses of the 20 // sub-register of the results. 21 // 22 // - Optimize Comparisons 23 // 24 // Optimization of comparison instructions. For instance, in this code: 25 // 26 // sub r1, 1 27 // cmp r1, 0 28 // bz L1 29 // 30 // If the "sub" instruction all ready sets (or could be modified to set) the 31 // same flag that the "cmp" instruction sets and that "bz" uses, then we can 32 // eliminate the "cmp" instruction. 33 // 34 // Another instance, in this code: 35 // 36 // sub r1, r3 | sub r1, imm 37 // cmp r3, r1 or cmp r1, r3 | cmp r1, imm 38 // bge L1 39 // 40 // If the branch instruction can use flag from "sub", then we can replace 41 // "sub" with "subs" and eliminate the "cmp" instruction. 42 // 43 // - Optimize Loads: 44 // 45 // Loads that can be folded into a later instruction. A load is foldable 46 // if it loads to virtual registers and the virtual register defined has 47 // a single use. 48 // 49 // - Optimize Copies and Bitcast (more generally, target specific copies): 50 // 51 // Rewrite copies and bitcasts to avoid cross register bank copies 52 // when possible. 53 // E.g., Consider the following example, where capital and lower 54 // letters denote different register file: 55 // b = copy A <-- cross-bank copy 56 // C = copy b <-- cross-bank copy 57 // => 58 // b = copy A <-- cross-bank copy 59 // C = copy A <-- same-bank copy 60 // 61 // E.g., for bitcast: 62 // b = bitcast A <-- cross-bank copy 63 // C = bitcast b <-- cross-bank copy 64 // => 65 // b = bitcast A <-- cross-bank copy 66 // C = copy A <-- same-bank copy 67 //===----------------------------------------------------------------------===// 68 69 #include "llvm/CodeGen/Passes.h" 70 #include "llvm/ADT/DenseMap.h" 71 #include "llvm/ADT/SmallPtrSet.h" 72 #include "llvm/ADT/SmallSet.h" 73 #include "llvm/ADT/Statistic.h" 74 #include "llvm/CodeGen/MachineDominators.h" 75 #include "llvm/CodeGen/MachineInstrBuilder.h" 76 #include "llvm/CodeGen/MachineRegisterInfo.h" 77 #include "llvm/Support/CommandLine.h" 78 #include "llvm/Support/Debug.h" 79 #include "llvm/Target/TargetInstrInfo.h" 80 #include "llvm/Target/TargetRegisterInfo.h" 81 #include "llvm/Target/TargetSubtargetInfo.h" 82 #include <utility> 83 using namespace llvm; 84 85 #define DEBUG_TYPE "peephole-opt" 86 87 // Optimize Extensions 88 static cl::opt<bool> 89 Aggressive("aggressive-ext-opt", cl::Hidden, 90 cl::desc("Aggressive extension optimization")); 91 92 static cl::opt<bool> 93 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), 94 cl::desc("Disable the peephole optimizer")); 95 96 static cl::opt<bool> 97 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), 98 cl::desc("Disable advanced copy optimization")); 99 100 STATISTIC(NumReuse, "Number of extension results reused"); 101 STATISTIC(NumCmps, "Number of compares eliminated"); 102 STATISTIC(NumImmFold, "Number of move immediate folded"); 103 STATISTIC(NumLoadFold, "Number of loads folded"); 104 STATISTIC(NumSelects, "Number of selects optimized"); 105 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); 106 STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); 107 108 namespace { 109 class PeepholeOptimizer : public MachineFunctionPass { 110 const TargetInstrInfo *TII; 111 const TargetRegisterInfo *TRI; 112 MachineRegisterInfo *MRI; 113 MachineDominatorTree *DT; // Machine dominator tree 114 115 public: 116 static char ID; // Pass identification 117 PeepholeOptimizer() : MachineFunctionPass(ID) { 118 initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); 119 } 120 121 bool runOnMachineFunction(MachineFunction &MF) override; 122 123 void getAnalysisUsage(AnalysisUsage &AU) const override { 124 AU.setPreservesCFG(); 125 MachineFunctionPass::getAnalysisUsage(AU); 126 if (Aggressive) { 127 AU.addRequired<MachineDominatorTree>(); 128 AU.addPreserved<MachineDominatorTree>(); 129 } 130 } 131 132 private: 133 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 134 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 135 SmallPtrSetImpl<MachineInstr*> &LocalMIs); 136 bool optimizeSelect(MachineInstr *MI, 137 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 138 bool optimizeCondBranch(MachineInstr *MI); 139 bool optimizeCopyOrBitcast(MachineInstr *MI); 140 bool optimizeCoalescableCopy(MachineInstr *MI); 141 bool optimizeUncoalescableCopy(MachineInstr *MI, 142 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 143 bool findNextSource(unsigned &Reg, unsigned &SubReg); 144 bool isMoveImmediate(MachineInstr *MI, 145 SmallSet<unsigned, 4> &ImmDefRegs, 146 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 147 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 148 SmallSet<unsigned, 4> &ImmDefRegs, 149 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 150 bool isLoadFoldable(MachineInstr *MI, 151 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); 152 153 /// \brief Check whether \p MI is understood by the register coalescer 154 /// but may require some rewriting. 155 bool isCoalescableCopy(const MachineInstr &MI) { 156 // SubregToRegs are not interesting, because they are already register 157 // coalescer friendly. 158 return MI.isCopy() || (!DisableAdvCopyOpt && 159 (MI.isRegSequence() || MI.isInsertSubreg() || 160 MI.isExtractSubreg())); 161 } 162 163 /// \brief Check whether \p MI is a copy like instruction that is 164 /// not recognized by the register coalescer. 165 bool isUncoalescableCopy(const MachineInstr &MI) { 166 return MI.isBitcast() || 167 (!DisableAdvCopyOpt && 168 (MI.isRegSequenceLike() || MI.isInsertSubregLike() || 169 MI.isExtractSubregLike())); 170 } 171 }; 172 173 /// \brief Helper class to track the possible sources of a value defined by 174 /// a (chain of) copy related instructions. 175 /// Given a definition (instruction and definition index), this class 176 /// follows the use-def chain to find successive suitable sources. 177 /// The given source can be used to rewrite the definition into 178 /// def = COPY src. 179 /// 180 /// For instance, let us consider the following snippet: 181 /// v0 = 182 /// v2 = INSERT_SUBREG v1, v0, sub0 183 /// def = COPY v2.sub0 184 /// 185 /// Using a ValueTracker for def = COPY v2.sub0 will give the following 186 /// suitable sources: 187 /// v2.sub0 and v0. 188 /// Then, def can be rewritten into def = COPY v0. 189 class ValueTracker { 190 private: 191 /// The current point into the use-def chain. 192 const MachineInstr *Def; 193 /// The index of the definition in Def. 194 unsigned DefIdx; 195 /// The sub register index of the definition. 196 unsigned DefSubReg; 197 /// The register where the value can be found. 198 unsigned Reg; 199 /// Specifiy whether or not the value tracking looks through 200 /// complex instructions. When this is false, the value tracker 201 /// bails on everything that is not a copy or a bitcast. 202 /// 203 /// Note: This could have been implemented as a specialized version of 204 /// the ValueTracker class but that would have complicated the code of 205 /// the users of this class. 206 bool UseAdvancedTracking; 207 /// MachineRegisterInfo used to perform tracking. 208 const MachineRegisterInfo &MRI; 209 /// Optional TargetInstrInfo used to perform some complex 210 /// tracking. 211 const TargetInstrInfo *TII; 212 213 /// \brief Dispatcher to the right underlying implementation of 214 /// getNextSource. 215 bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); 216 /// \brief Specialized version of getNextSource for Copy instructions. 217 bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); 218 /// \brief Specialized version of getNextSource for Bitcast instructions. 219 bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); 220 /// \brief Specialized version of getNextSource for RegSequence 221 /// instructions. 222 bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); 223 /// \brief Specialized version of getNextSource for InsertSubreg 224 /// instructions. 225 bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 226 /// \brief Specialized version of getNextSource for ExtractSubreg 227 /// instructions. 228 bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 229 /// \brief Specialized version of getNextSource for SubregToReg 230 /// instructions. 231 bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); 232 233 public: 234 /// \brief Create a ValueTracker instance for the value defined by \p Reg. 235 /// \p DefSubReg represents the sub register index the value tracker will 236 /// track. It does not need to match the sub register index used in the 237 /// definition of \p Reg. 238 /// \p UseAdvancedTracking specifies whether or not the value tracker looks 239 /// through complex instructions. By default (false), it handles only copy 240 /// and bitcast instructions. 241 /// If \p Reg is a physical register, a value tracker constructed with 242 /// this constructor will not find any alternative source. 243 /// Indeed, when \p Reg is a physical register that constructor does not 244 /// know which definition of \p Reg it should track. 245 /// Use the next constructor to track a physical register. 246 ValueTracker(unsigned Reg, unsigned DefSubReg, 247 const MachineRegisterInfo &MRI, 248 bool UseAdvancedTracking = false, 249 const TargetInstrInfo *TII = nullptr) 250 : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), 251 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 252 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 253 Def = MRI.getVRegDef(Reg); 254 DefIdx = MRI.def_begin(Reg).getOperandNo(); 255 } 256 } 257 258 /// \brief Create a ValueTracker instance for the value defined by 259 /// the pair \p MI, \p DefIdx. 260 /// Unlike the other constructor, the value tracker produced by this one 261 /// may be able to find a new source when the definition is a physical 262 /// register. 263 /// This could be useful to rewrite target specific instructions into 264 /// generic copy instructions. 265 ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, 266 const MachineRegisterInfo &MRI, 267 bool UseAdvancedTracking = false, 268 const TargetInstrInfo *TII = nullptr) 269 : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), 270 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 271 assert(DefIdx < Def->getDesc().getNumDefs() && 272 Def->getOperand(DefIdx).isReg() && "Invalid definition"); 273 Reg = Def->getOperand(DefIdx).getReg(); 274 } 275 276 /// \brief Following the use-def chain, get the next available source 277 /// for the tracked value. 278 /// When the returned value is not nullptr, \p SrcReg gives the register 279 /// that contain the tracked value. 280 /// \note The sub register index returned in \p SrcSubReg must be used 281 /// on \p SrcReg to access the actual value. 282 /// \return Unless the returned value is nullptr (i.e., no source found), 283 /// \p SrcReg gives the register of the next source used in the returned 284 /// instruction and \p SrcSubReg the sub-register index to be used on that 285 /// source to get the tracked value. When nullptr is returned, no 286 /// alternative source has been found. 287 const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); 288 289 /// \brief Get the last register where the initial value can be found. 290 /// Initially this is the register of the definition. 291 /// Then, after each successful call to getNextSource, this is the 292 /// register of the last source. 293 unsigned getReg() const { return Reg; } 294 }; 295 } 296 297 char PeepholeOptimizer::ID = 0; 298 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; 299 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", 300 "Peephole Optimizations", false, false) 301 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 302 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", 303 "Peephole Optimizations", false, false) 304 305 /// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads 306 /// a single register and writes a single register and it does not modify the 307 /// source, and if the source value is preserved as a sub-register of the 308 /// result, then replace all reachable uses of the source with the subreg of the 309 /// result. 310 /// 311 /// Do not generate an EXTRACT that is used only in a debug use, as this changes 312 /// the code. Since this code does not currently share EXTRACTs, just ignore all 313 /// debug uses. 314 bool PeepholeOptimizer:: 315 optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 316 SmallPtrSetImpl<MachineInstr*> &LocalMIs) { 317 unsigned SrcReg, DstReg, SubIdx; 318 if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) 319 return false; 320 321 if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 322 TargetRegisterInfo::isPhysicalRegister(SrcReg)) 323 return false; 324 325 if (MRI->hasOneNonDBGUse(SrcReg)) 326 // No other uses. 327 return false; 328 329 // Ensure DstReg can get a register class that actually supports 330 // sub-registers. Don't change the class until we commit. 331 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); 332 DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx); 333 if (!DstRC) 334 return false; 335 336 // The ext instr may be operating on a sub-register of SrcReg as well. 337 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit 338 // register. 339 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of 340 // SrcReg:SubIdx should be replaced. 341 bool UseSrcSubIdx = 342 TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; 343 344 // The source has other uses. See if we can replace the other uses with use of 345 // the result of the extension. 346 SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 347 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 348 ReachedBBs.insert(UI.getParent()); 349 350 // Uses that are in the same BB of uses of the result of the instruction. 351 SmallVector<MachineOperand*, 8> Uses; 352 353 // Uses that the result of the instruction can reach. 354 SmallVector<MachineOperand*, 8> ExtendedUses; 355 356 bool ExtendLife = true; 357 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { 358 MachineInstr *UseMI = UseMO.getParent(); 359 if (UseMI == MI) 360 continue; 361 362 if (UseMI->isPHI()) { 363 ExtendLife = false; 364 continue; 365 } 366 367 // Only accept uses of SrcReg:SubIdx. 368 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) 369 continue; 370 371 // It's an error to translate this: 372 // 373 // %reg1025 = <sext> %reg1024 374 // ... 375 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 376 // 377 // into this: 378 // 379 // %reg1025 = <sext> %reg1024 380 // ... 381 // %reg1027 = COPY %reg1025:4 382 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 383 // 384 // The problem here is that SUBREG_TO_REG is there to assert that an 385 // implicit zext occurs. It doesn't insert a zext instruction. If we allow 386 // the COPY here, it will give us the value after the <sext>, not the 387 // original value of %reg1024 before <sext>. 388 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 389 continue; 390 391 MachineBasicBlock *UseMBB = UseMI->getParent(); 392 if (UseMBB == MBB) { 393 // Local uses that come after the extension. 394 if (!LocalMIs.count(UseMI)) 395 Uses.push_back(&UseMO); 396 } else if (ReachedBBs.count(UseMBB)) { 397 // Non-local uses where the result of the extension is used. Always 398 // replace these unless it's a PHI. 399 Uses.push_back(&UseMO); 400 } else if (Aggressive && DT->dominates(MBB, UseMBB)) { 401 // We may want to extend the live range of the extension result in order 402 // to replace these uses. 403 ExtendedUses.push_back(&UseMO); 404 } else { 405 // Both will be live out of the def MBB anyway. Don't extend live range of 406 // the extension result. 407 ExtendLife = false; 408 break; 409 } 410 } 411 412 if (ExtendLife && !ExtendedUses.empty()) 413 // Extend the liveness of the extension result. 414 Uses.append(ExtendedUses.begin(), ExtendedUses.end()); 415 416 // Now replace all uses. 417 bool Changed = false; 418 if (!Uses.empty()) { 419 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 420 421 // Look for PHI uses of the extended result, we don't want to extend the 422 // liveness of a PHI input. It breaks all kinds of assumptions down 423 // stream. A PHI use is expected to be the kill of its source values. 424 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 425 if (UI.isPHI()) 426 PHIBBs.insert(UI.getParent()); 427 428 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 429 for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 430 MachineOperand *UseMO = Uses[i]; 431 MachineInstr *UseMI = UseMO->getParent(); 432 MachineBasicBlock *UseMBB = UseMI->getParent(); 433 if (PHIBBs.count(UseMBB)) 434 continue; 435 436 // About to add uses of DstReg, clear DstReg's kill flags. 437 if (!Changed) { 438 MRI->clearKillFlags(DstReg); 439 MRI->constrainRegClass(DstReg, DstRC); 440 } 441 442 unsigned NewVR = MRI->createVirtualRegister(RC); 443 MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 444 TII->get(TargetOpcode::COPY), NewVR) 445 .addReg(DstReg, 0, SubIdx); 446 // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. 447 if (UseSrcSubIdx) { 448 Copy->getOperand(0).setSubReg(SubIdx); 449 Copy->getOperand(0).setIsUndef(); 450 } 451 UseMO->setReg(NewVR); 452 ++NumReuse; 453 Changed = true; 454 } 455 } 456 457 return Changed; 458 } 459 460 /// optimizeCmpInstr - If the instruction is a compare and the previous 461 /// instruction it's comparing against all ready sets (or could be modified to 462 /// set) the same flag as the compare, then we can remove the comparison and use 463 /// the flag from the previous instruction. 464 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, 465 MachineBasicBlock *MBB) { 466 // If this instruction is a comparison against zero and isn't comparing a 467 // physical register, we can try to optimize it. 468 unsigned SrcReg, SrcReg2; 469 int CmpMask, CmpValue; 470 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || 471 TargetRegisterInfo::isPhysicalRegister(SrcReg) || 472 (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) 473 return false; 474 475 // Attempt to optimize the comparison instruction. 476 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { 477 ++NumCmps; 478 return true; 479 } 480 481 return false; 482 } 483 484 /// Optimize a select instruction. 485 bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, 486 SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 487 unsigned TrueOp = 0; 488 unsigned FalseOp = 0; 489 bool Optimizable = false; 490 SmallVector<MachineOperand, 4> Cond; 491 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) 492 return false; 493 if (!Optimizable) 494 return false; 495 if (!TII->optimizeSelect(MI, LocalMIs)) 496 return false; 497 MI->eraseFromParent(); 498 ++NumSelects; 499 return true; 500 } 501 502 /// \brief Check if a simpler conditional branch can be 503 // generated 504 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { 505 return TII->optimizeCondBranch(MI); 506 } 507 508 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg) 509 /// share the same register file. 510 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, 511 const TargetRegisterClass *DefRC, 512 unsigned DefSubReg, 513 const TargetRegisterClass *SrcRC, 514 unsigned SrcSubReg) { 515 // Same register class. 516 if (DefRC == SrcRC) 517 return true; 518 519 // Both operands are sub registers. Check if they share a register class. 520 unsigned SrcIdx, DefIdx; 521 if (SrcSubReg && DefSubReg) 522 return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, 523 SrcIdx, DefIdx) != nullptr; 524 // At most one of the register is a sub register, make it Src to avoid 525 // duplicating the test. 526 if (!SrcSubReg) { 527 std::swap(DefSubReg, SrcSubReg); 528 std::swap(DefRC, SrcRC); 529 } 530 531 // One of the register is a sub register, check if we can get a superclass. 532 if (SrcSubReg) 533 return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; 534 // Plain copy. 535 return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; 536 } 537 538 /// \brief Try to find the next source that share the same register file 539 /// for the value defined by \p Reg and \p SubReg. 540 /// When true is returned, \p Reg and \p SubReg are updated with the 541 /// register number and sub-register index of the new source. 542 /// \return False if no alternative sources are available. True otherwise. 543 bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { 544 // Do not try to find a new source for a physical register. 545 // So far we do not have any motivating example for doing that. 546 // Thus, instead of maintaining untested code, we will revisit that if 547 // that changes at some point. 548 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 549 return false; 550 551 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); 552 unsigned DefSubReg = SubReg; 553 554 unsigned Src; 555 unsigned SrcSubReg; 556 bool ShouldRewrite = false; 557 558 // Follow the chain of copies until we reach the top of the use-def chain 559 // or find a more suitable source. 560 ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); 561 do { 562 unsigned CopySrcReg, CopySrcSubReg; 563 if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) 564 break; 565 Src = CopySrcReg; 566 SrcSubReg = CopySrcSubReg; 567 568 // Do not extend the live-ranges of physical registers as they add 569 // constraints to the register allocator. 570 // Moreover, if we want to extend the live-range of a physical register, 571 // unlike SSA virtual register, we will have to check that they are not 572 // redefine before the related use. 573 if (TargetRegisterInfo::isPhysicalRegister(Src)) 574 break; 575 576 const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); 577 578 // If this source does not incur a cross register bank copy, use it. 579 ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, 580 SrcSubReg); 581 } while (!ShouldRewrite); 582 583 // If we did not find a more suitable source, there is nothing to optimize. 584 if (!ShouldRewrite || Src == Reg) 585 return false; 586 587 Reg = Src; 588 SubReg = SrcSubReg; 589 return true; 590 } 591 592 namespace { 593 /// \brief Helper class to rewrite the arguments of a copy-like instruction. 594 class CopyRewriter { 595 protected: 596 /// The copy-like instruction. 597 MachineInstr &CopyLike; 598 /// The index of the source being rewritten. 599 unsigned CurrentSrcIdx; 600 601 public: 602 CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} 603 604 virtual ~CopyRewriter() {} 605 606 /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and 607 /// the related value that it affects (TrackReg, TrackSubReg). 608 /// A source is considered rewritable if its register class and the 609 /// register class of the related TrackReg may not be register 610 /// coalescer friendly. In other words, given a copy-like instruction 611 /// not all the arguments may be returned at rewritable source, since 612 /// some arguments are none to be register coalescer friendly. 613 /// 614 /// Each call of this method moves the current source to the next 615 /// rewritable source. 616 /// For instance, let CopyLike be the instruction to rewrite. 617 /// CopyLike has one definition and one source: 618 /// dst.dstSubIdx = CopyLike src.srcSubIdx. 619 /// 620 /// The first call will give the first rewritable source, i.e., 621 /// the only source this instruction has: 622 /// (SrcReg, SrcSubReg) = (src, srcSubIdx). 623 /// This source defines the whole definition, i.e., 624 /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). 625 /// 626 /// The second and subsequent calls will return false, has there is only one 627 /// rewritable source. 628 /// 629 /// \return True if a rewritable source has been found, false otherwise. 630 /// The output arguments are valid if and only if true is returned. 631 virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 632 unsigned &TrackReg, 633 unsigned &TrackSubReg) { 634 // If CurrentSrcIdx == 1, this means this function has already been 635 // called once. CopyLike has one defintiion and one argument, thus, 636 // there is nothing else to rewrite. 637 if (!CopyLike.isCopy() || CurrentSrcIdx == 1) 638 return false; 639 // This is the first call to getNextRewritableSource. 640 // Move the CurrentSrcIdx to remember that we made that call. 641 CurrentSrcIdx = 1; 642 // The rewritable source is the argument. 643 const MachineOperand &MOSrc = CopyLike.getOperand(1); 644 SrcReg = MOSrc.getReg(); 645 SrcSubReg = MOSrc.getSubReg(); 646 // What we track are the alternative sources of the definition. 647 const MachineOperand &MODef = CopyLike.getOperand(0); 648 TrackReg = MODef.getReg(); 649 TrackSubReg = MODef.getSubReg(); 650 return true; 651 } 652 653 /// \brief Rewrite the current source with \p NewReg and \p NewSubReg 654 /// if possible. 655 /// \return True if the rewritting was possible, false otherwise. 656 virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { 657 if (!CopyLike.isCopy() || CurrentSrcIdx != 1) 658 return false; 659 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); 660 MOSrc.setReg(NewReg); 661 MOSrc.setSubReg(NewSubReg); 662 return true; 663 } 664 }; 665 666 /// \brief Specialized rewriter for INSERT_SUBREG instruction. 667 class InsertSubregRewriter : public CopyRewriter { 668 public: 669 InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { 670 assert(MI.isInsertSubreg() && "Invalid instruction"); 671 } 672 673 /// \brief See CopyRewriter::getNextRewritableSource. 674 /// Here CopyLike has the following form: 675 /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. 676 /// Src1 has the same register class has dst, hence, there is 677 /// nothing to rewrite. 678 /// Src2.src2SubIdx, may not be register coalescer friendly. 679 /// Therefore, the first call to this method returns: 680 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 681 /// (TrackReg, TrackSubReg) = (dst, subIdx). 682 /// 683 /// Subsequence calls will return false. 684 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 685 unsigned &TrackReg, 686 unsigned &TrackSubReg) override { 687 // If we already get the only source we can rewrite, return false. 688 if (CurrentSrcIdx == 2) 689 return false; 690 // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. 691 CurrentSrcIdx = 2; 692 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); 693 SrcReg = MOInsertedReg.getReg(); 694 SrcSubReg = MOInsertedReg.getSubReg(); 695 const MachineOperand &MODef = CopyLike.getOperand(0); 696 697 // We want to track something that is compatible with the 698 // partial definition. 699 TrackReg = MODef.getReg(); 700 if (MODef.getSubReg()) 701 // Bails if we have to compose sub-register indices. 702 return false; 703 TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); 704 return true; 705 } 706 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 707 if (CurrentSrcIdx != 2) 708 return false; 709 // We are rewriting the inserted reg. 710 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 711 MO.setReg(NewReg); 712 MO.setSubReg(NewSubReg); 713 return true; 714 } 715 }; 716 717 /// \brief Specialized rewriter for EXTRACT_SUBREG instruction. 718 class ExtractSubregRewriter : public CopyRewriter { 719 const TargetInstrInfo &TII; 720 721 public: 722 ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) 723 : CopyRewriter(MI), TII(TII) { 724 assert(MI.isExtractSubreg() && "Invalid instruction"); 725 } 726 727 /// \brief See CopyRewriter::getNextRewritableSource. 728 /// Here CopyLike has the following form: 729 /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. 730 /// There is only one rewritable source: Src.subIdx, 731 /// which defines dst.dstSubIdx. 732 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 733 unsigned &TrackReg, 734 unsigned &TrackSubReg) override { 735 // If we already get the only source we can rewrite, return false. 736 if (CurrentSrcIdx == 1) 737 return false; 738 // We are looking at v1 = EXTRACT_SUBREG v0, sub0. 739 CurrentSrcIdx = 1; 740 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); 741 SrcReg = MOExtractedReg.getReg(); 742 // If we have to compose sub-register indices, bails out. 743 if (MOExtractedReg.getSubReg()) 744 return false; 745 746 SrcSubReg = CopyLike.getOperand(2).getImm(); 747 748 // We want to track something that is compatible with the definition. 749 const MachineOperand &MODef = CopyLike.getOperand(0); 750 TrackReg = MODef.getReg(); 751 TrackSubReg = MODef.getSubReg(); 752 return true; 753 } 754 755 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 756 // The only source we can rewrite is the input register. 757 if (CurrentSrcIdx != 1) 758 return false; 759 760 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); 761 762 // If we find a source that does not require to extract something, 763 // rewrite the operation with a copy. 764 if (!NewSubReg) { 765 // Move the current index to an invalid position. 766 // We do not want another call to this method to be able 767 // to do any change. 768 CurrentSrcIdx = -1; 769 // Rewrite the operation as a COPY. 770 // Get rid of the sub-register index. 771 CopyLike.RemoveOperand(2); 772 // Morph the operation into a COPY. 773 CopyLike.setDesc(TII.get(TargetOpcode::COPY)); 774 return true; 775 } 776 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); 777 return true; 778 } 779 }; 780 781 /// \brief Specialized rewriter for REG_SEQUENCE instruction. 782 class RegSequenceRewriter : public CopyRewriter { 783 public: 784 RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { 785 assert(MI.isRegSequence() && "Invalid instruction"); 786 } 787 788 /// \brief See CopyRewriter::getNextRewritableSource. 789 /// Here CopyLike has the following form: 790 /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. 791 /// Each call will return a different source, walking all the available 792 /// source. 793 /// 794 /// The first call returns: 795 /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). 796 /// (TrackReg, TrackSubReg) = (dst, subIdx1). 797 /// 798 /// The second call returns: 799 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 800 /// (TrackReg, TrackSubReg) = (dst, subIdx2). 801 /// 802 /// And so on, until all the sources have been traversed, then 803 /// it returns false. 804 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 805 unsigned &TrackReg, 806 unsigned &TrackSubReg) override { 807 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. 808 809 // If this is the first call, move to the first argument. 810 if (CurrentSrcIdx == 0) { 811 CurrentSrcIdx = 1; 812 } else { 813 // Otherwise, move to the next argument and check that it is valid. 814 CurrentSrcIdx += 2; 815 if (CurrentSrcIdx >= CopyLike.getNumOperands()) 816 return false; 817 } 818 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); 819 SrcReg = MOInsertedReg.getReg(); 820 // If we have to compose sub-register indices, bails out. 821 if ((SrcSubReg = MOInsertedReg.getSubReg())) 822 return false; 823 824 // We want to track something that is compatible with the related 825 // partial definition. 826 TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); 827 828 const MachineOperand &MODef = CopyLike.getOperand(0); 829 TrackReg = MODef.getReg(); 830 // If we have to compose sub-registers, bails. 831 return MODef.getSubReg() == 0; 832 } 833 834 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 835 // We cannot rewrite out of bound operands. 836 // Moreover, rewritable sources are at odd positions. 837 if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) 838 return false; 839 840 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 841 MO.setReg(NewReg); 842 MO.setSubReg(NewSubReg); 843 return true; 844 } 845 }; 846 } // End namespace. 847 848 /// \brief Get the appropriated CopyRewriter for \p MI. 849 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr 850 /// if no rewriter works for \p MI. 851 static CopyRewriter *getCopyRewriter(MachineInstr &MI, 852 const TargetInstrInfo &TII) { 853 switch (MI.getOpcode()) { 854 default: 855 return nullptr; 856 case TargetOpcode::COPY: 857 return new CopyRewriter(MI); 858 case TargetOpcode::INSERT_SUBREG: 859 return new InsertSubregRewriter(MI); 860 case TargetOpcode::EXTRACT_SUBREG: 861 return new ExtractSubregRewriter(MI, TII); 862 case TargetOpcode::REG_SEQUENCE: 863 return new RegSequenceRewriter(MI); 864 } 865 llvm_unreachable(nullptr); 866 } 867 868 /// \brief Optimize generic copy instructions to avoid cross 869 /// register bank copy. The optimization looks through a chain of 870 /// copies and tries to find a source that has a compatible register 871 /// class. 872 /// Two register classes are considered to be compatible if they share 873 /// the same register bank. 874 /// New copies issued by this optimization are register allocator 875 /// friendly. This optimization does not remove any copy as it may 876 /// overconstraint the register allocator, but replaces some operands 877 /// when possible. 878 /// \pre isCoalescableCopy(*MI) is true. 879 /// \return True, when \p MI has been rewritten. False otherwise. 880 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { 881 assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); 882 assert(MI->getDesc().getNumDefs() == 1 && 883 "Coalescer can understand multiple defs?!"); 884 const MachineOperand &MODef = MI->getOperand(0); 885 // Do not rewrite physical definitions. 886 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 887 return false; 888 889 bool Changed = false; 890 // Get the right rewriter for the current copy. 891 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII)); 892 // If none exists, bails out. 893 if (!CpyRewriter) 894 return false; 895 // Rewrite each rewritable source. 896 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; 897 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, 898 TrackSubReg)) { 899 unsigned NewSrc = TrackReg; 900 unsigned NewSubReg = TrackSubReg; 901 // Try to find a more suitable source. 902 // If we failed to do so, or get the actual source, 903 // move to the next source. 904 if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) 905 continue; 906 // Rewrite source. 907 if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { 908 // We may have extended the live-range of NewSrc, account for that. 909 MRI->clearKillFlags(NewSrc); 910 Changed = true; 911 } 912 } 913 // TODO: We could have a clean-up method to tidy the instruction. 914 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 915 // => v0 = COPY v1 916 // Currently we haven't seen motivating example for that and we 917 // want to avoid untested code. 918 NumRewrittenCopies += Changed == true; 919 return Changed; 920 } 921 922 /// \brief Optimize copy-like instructions to create 923 /// register coalescer friendly instruction. 924 /// The optimization tries to kill-off the \p MI by looking 925 /// through a chain of copies to find a source that has a compatible 926 /// register class. 927 /// If such a source is found, it replace \p MI by a generic COPY 928 /// operation. 929 /// \pre isUncoalescableCopy(*MI) is true. 930 /// \return True, when \p MI has been optimized. In that case, \p MI has 931 /// been removed from its parent. 932 /// All COPY instructions created, are inserted in \p LocalMIs. 933 bool PeepholeOptimizer::optimizeUncoalescableCopy( 934 MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 935 assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); 936 937 // Check if we can rewrite all the values defined by this instruction. 938 SmallVector< 939 std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, 940 4> RewritePairs; 941 for (const MachineOperand &MODef : MI->defs()) { 942 if (MODef.isDead()) 943 // We can ignore those. 944 continue; 945 946 // If a physical register is here, this is probably for a good reason. 947 // Do not rewrite that. 948 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 949 return false; 950 951 // If we do not know how to rewrite this definition, there is no point 952 // in trying to kill this instruction. 953 TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); 954 TargetInstrInfo::RegSubRegPair Src = Def; 955 if (!findNextSource(Src.Reg, Src.SubReg)) 956 return false; 957 RewritePairs.push_back(std::make_pair(Def, Src)); 958 } 959 // The change is possible for all defs, do it. 960 for (const auto &PairDefSrc : RewritePairs) { 961 const auto &Def = PairDefSrc.first; 962 const auto &Src = PairDefSrc.second; 963 // Rewrite the "copy" in a way the register coalescer understands. 964 assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && 965 "We do not rewrite physical registers"); 966 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); 967 unsigned NewVR = MRI->createVirtualRegister(DefRC); 968 MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 969 TII->get(TargetOpcode::COPY), 970 NewVR).addReg(Src.Reg, 0, Src.SubReg); 971 NewCopy->getOperand(0).setSubReg(Def.SubReg); 972 if (Def.SubReg) 973 NewCopy->getOperand(0).setIsUndef(); 974 LocalMIs.insert(NewCopy); 975 MRI->replaceRegWith(Def.Reg, NewVR); 976 MRI->clearKillFlags(NewVR); 977 // We extended the lifetime of Src. 978 // Clear the kill flags to account for that. 979 MRI->clearKillFlags(Src.Reg); 980 } 981 // MI is now dead. 982 MI->eraseFromParent(); 983 ++NumUncoalescableCopies; 984 return true; 985 } 986 987 /// isLoadFoldable - Check whether MI is a candidate for folding into a later 988 /// instruction. We only fold loads to virtual registers and the virtual 989 /// register defined has a single use. 990 bool PeepholeOptimizer::isLoadFoldable( 991 MachineInstr *MI, 992 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { 993 if (!MI->canFoldAsLoad() || !MI->mayLoad()) 994 return false; 995 const MCInstrDesc &MCID = MI->getDesc(); 996 if (MCID.getNumDefs() != 1) 997 return false; 998 999 unsigned Reg = MI->getOperand(0).getReg(); 1000 // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting 1001 // loads. It should be checked when processing uses of the load, since 1002 // uses can be removed during peephole. 1003 if (!MI->getOperand(0).getSubReg() && 1004 TargetRegisterInfo::isVirtualRegister(Reg) && 1005 MRI->hasOneNonDBGUse(Reg)) { 1006 FoldAsLoadDefCandidates.insert(Reg); 1007 return true; 1008 } 1009 return false; 1010 } 1011 1012 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, 1013 SmallSet<unsigned, 4> &ImmDefRegs, 1014 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1015 const MCInstrDesc &MCID = MI->getDesc(); 1016 if (!MI->isMoveImmediate()) 1017 return false; 1018 if (MCID.getNumDefs() != 1) 1019 return false; 1020 unsigned Reg = MI->getOperand(0).getReg(); 1021 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1022 ImmDefMIs.insert(std::make_pair(Reg, MI)); 1023 ImmDefRegs.insert(Reg); 1024 return true; 1025 } 1026 1027 return false; 1028 } 1029 1030 /// foldImmediate - Try folding register operands that are defined by move 1031 /// immediate instructions, i.e. a trivial constant folding optimization, if 1032 /// and only if the def and use are in the same BB. 1033 bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 1034 SmallSet<unsigned, 4> &ImmDefRegs, 1035 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1036 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1037 MachineOperand &MO = MI->getOperand(i); 1038 if (!MO.isReg() || MO.isDef()) 1039 continue; 1040 unsigned Reg = MO.getReg(); 1041 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1042 continue; 1043 if (ImmDefRegs.count(Reg) == 0) 1044 continue; 1045 DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); 1046 assert(II != ImmDefMIs.end()); 1047 if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { 1048 ++NumImmFold; 1049 return true; 1050 } 1051 } 1052 return false; 1053 } 1054 1055 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 1056 if (skipOptnoneFunction(*MF.getFunction())) 1057 return false; 1058 1059 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); 1060 DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); 1061 1062 if (DisablePeephole) 1063 return false; 1064 1065 TII = MF.getSubtarget().getInstrInfo(); 1066 TRI = MF.getSubtarget().getRegisterInfo(); 1067 MRI = &MF.getRegInfo(); 1068 DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; 1069 1070 bool Changed = false; 1071 1072 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 1073 MachineBasicBlock *MBB = &*I; 1074 1075 bool SeenMoveImm = false; 1076 1077 // During this forward scan, at some point it needs to answer the question 1078 // "given a pointer to an MI in the current BB, is it located before or 1079 // after the current instruction". 1080 // To perform this, the following set keeps track of the MIs already seen 1081 // during the scan, if a MI is not in the set, it is assumed to be located 1082 // after. Newly created MIs have to be inserted in the set as well. 1083 SmallPtrSet<MachineInstr*, 16> LocalMIs; 1084 SmallSet<unsigned, 4> ImmDefRegs; 1085 DenseMap<unsigned, MachineInstr*> ImmDefMIs; 1086 SmallSet<unsigned, 16> FoldAsLoadDefCandidates; 1087 1088 for (MachineBasicBlock::iterator 1089 MII = I->begin(), MIE = I->end(); MII != MIE; ) { 1090 MachineInstr *MI = &*MII; 1091 // We may be erasing MI below, increment MII now. 1092 ++MII; 1093 LocalMIs.insert(MI); 1094 1095 // Skip debug values. They should not affect this peephole optimization. 1096 if (MI->isDebugValue()) 1097 continue; 1098 1099 // If there exists an instruction which belongs to the following 1100 // categories, we will discard the load candidates. 1101 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || 1102 MI->isKill() || MI->isInlineAsm() || 1103 MI->hasUnmodeledSideEffects()) { 1104 FoldAsLoadDefCandidates.clear(); 1105 continue; 1106 } 1107 if (MI->mayStore() || MI->isCall()) 1108 FoldAsLoadDefCandidates.clear(); 1109 1110 if ((isUncoalescableCopy(*MI) && 1111 optimizeUncoalescableCopy(MI, LocalMIs)) || 1112 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || 1113 (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { 1114 // MI is deleted. 1115 LocalMIs.erase(MI); 1116 Changed = true; 1117 continue; 1118 } 1119 1120 if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { 1121 Changed = true; 1122 continue; 1123 } 1124 1125 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { 1126 // MI is just rewritten. 1127 Changed = true; 1128 continue; 1129 } 1130 1131 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { 1132 SeenMoveImm = true; 1133 } else { 1134 Changed |= optimizeExtInstr(MI, MBB, LocalMIs); 1135 // optimizeExtInstr might have created new instructions after MI 1136 // and before the already incremented MII. Adjust MII so that the 1137 // next iteration sees the new instructions. 1138 MII = MI; 1139 ++MII; 1140 if (SeenMoveImm) 1141 Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); 1142 } 1143 1144 // Check whether MI is a load candidate for folding into a later 1145 // instruction. If MI is not a candidate, check whether we can fold an 1146 // earlier load into MI. 1147 if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && 1148 !FoldAsLoadDefCandidates.empty()) { 1149 const MCInstrDesc &MIDesc = MI->getDesc(); 1150 for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); 1151 ++i) { 1152 const MachineOperand &MOp = MI->getOperand(i); 1153 if (!MOp.isReg()) 1154 continue; 1155 unsigned FoldAsLoadDefReg = MOp.getReg(); 1156 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { 1157 // We need to fold load after optimizeCmpInstr, since 1158 // optimizeCmpInstr can enable folding by converting SUB to CMP. 1159 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and 1160 // we need it for markUsesInDebugValueAsUndef(). 1161 unsigned FoldedReg = FoldAsLoadDefReg; 1162 MachineInstr *DefMI = nullptr; 1163 MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, 1164 FoldAsLoadDefReg, 1165 DefMI); 1166 if (FoldMI) { 1167 // Update LocalMIs since we replaced MI with FoldMI and deleted 1168 // DefMI. 1169 DEBUG(dbgs() << "Replacing: " << *MI); 1170 DEBUG(dbgs() << " With: " << *FoldMI); 1171 LocalMIs.erase(MI); 1172 LocalMIs.erase(DefMI); 1173 LocalMIs.insert(FoldMI); 1174 MI->eraseFromParent(); 1175 DefMI->eraseFromParent(); 1176 MRI->markUsesInDebugValueAsUndef(FoldedReg); 1177 FoldAsLoadDefCandidates.erase(FoldedReg); 1178 ++NumLoadFold; 1179 // MI is replaced with FoldMI. 1180 Changed = true; 1181 break; 1182 } 1183 } 1184 } 1185 } 1186 } 1187 } 1188 1189 return Changed; 1190 } 1191 1192 bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, 1193 unsigned &SrcSubReg) { 1194 assert(Def->isCopy() && "Invalid definition"); 1195 // Copy instruction are supposed to be: Def = Src. 1196 // If someone breaks this assumption, bad things will happen everywhere. 1197 assert(Def->getNumOperands() == 2 && "Invalid number of operands"); 1198 1199 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1200 // If we look for a different subreg, it means we want a subreg of src. 1201 // Bails as we do not support composing subreg yet. 1202 return false; 1203 // Otherwise, we want the whole source. 1204 const MachineOperand &Src = Def->getOperand(1); 1205 SrcReg = Src.getReg(); 1206 SrcSubReg = Src.getSubReg(); 1207 return true; 1208 } 1209 1210 bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, 1211 unsigned &SrcSubReg) { 1212 assert(Def->isBitcast() && "Invalid definition"); 1213 1214 // Bail if there are effects that a plain copy will not expose. 1215 if (Def->hasUnmodeledSideEffects()) 1216 return false; 1217 1218 // Bitcasts with more than one def are not supported. 1219 if (Def->getDesc().getNumDefs() != 1) 1220 return false; 1221 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1222 // If we look for a different subreg, it means we want a subreg of the src. 1223 // Bails as we do not support composing subreg yet. 1224 return false; 1225 1226 unsigned SrcIdx = Def->getNumOperands(); 1227 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; 1228 ++OpIdx) { 1229 const MachineOperand &MO = Def->getOperand(OpIdx); 1230 if (!MO.isReg() || !MO.getReg()) 1231 continue; 1232 assert(!MO.isDef() && "We should have skipped all the definitions by now"); 1233 if (SrcIdx != EndOpIdx) 1234 // Multiple sources? 1235 return false; 1236 SrcIdx = OpIdx; 1237 } 1238 const MachineOperand &Src = Def->getOperand(SrcIdx); 1239 SrcReg = Src.getReg(); 1240 SrcSubReg = Src.getSubReg(); 1241 return true; 1242 } 1243 1244 bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, 1245 unsigned &SrcSubReg) { 1246 assert((Def->isRegSequence() || Def->isRegSequenceLike()) && 1247 "Invalid definition"); 1248 1249 if (Def->getOperand(DefIdx).getSubReg()) 1250 // If we are composing subreg, bails out. 1251 // The case we are checking is Def.<subreg> = REG_SEQUENCE. 1252 // This should almost never happen as the SSA property is tracked at 1253 // the register level (as opposed to the subreg level). 1254 // I.e., 1255 // Def.sub0 = 1256 // Def.sub1 = 1257 // is a valid SSA representation for Def.sub0 and Def.sub1, but not for 1258 // Def. Thus, it must not be generated. 1259 // However, some code could theoretically generates a single 1260 // Def.sub0 (i.e, not defining the other subregs) and we would 1261 // have this case. 1262 // If we can ascertain (or force) that this never happens, we could 1263 // turn that into an assertion. 1264 return false; 1265 1266 if (!TII) 1267 // We could handle the REG_SEQUENCE here, but we do not want to 1268 // duplicate the code from the generic TII. 1269 return false; 1270 1271 SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; 1272 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) 1273 return false; 1274 1275 // We are looking at: 1276 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... 1277 // Check if one of the operand defines the subreg we are interested in. 1278 for (auto &RegSeqInput : RegSeqInputRegs) { 1279 if (RegSeqInput.SubIdx == DefSubReg) { 1280 if (RegSeqInput.SubReg) 1281 // Bails if we have to compose sub registers. 1282 return false; 1283 1284 SrcReg = RegSeqInput.Reg; 1285 SrcSubReg = RegSeqInput.SubReg; 1286 return true; 1287 } 1288 } 1289 1290 // If the subreg we are tracking is super-defined by another subreg, 1291 // we could follow this value. However, this would require to compose 1292 // the subreg and we do not do that for now. 1293 return false; 1294 } 1295 1296 bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, 1297 unsigned &SrcSubReg) { 1298 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && 1299 "Invalid definition"); 1300 1301 if (Def->getOperand(DefIdx).getSubReg()) 1302 // If we are composing subreg, bails out. 1303 // Same remark as getNextSourceFromRegSequence. 1304 // I.e., this may be turned into an assert. 1305 return false; 1306 1307 if (!TII) 1308 // We could handle the REG_SEQUENCE here, but we do not want to 1309 // duplicate the code from the generic TII. 1310 return false; 1311 1312 TargetInstrInfo::RegSubRegPair BaseReg; 1313 TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; 1314 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) 1315 return false; 1316 1317 // We are looking at: 1318 // Def = INSERT_SUBREG v0, v1, sub1 1319 // There are two cases: 1320 // 1. DefSubReg == sub1, get v1. 1321 // 2. DefSubReg != sub1, the value may be available through v0. 1322 1323 // #1 Check if the inserted register matches the required sub index. 1324 if (InsertedReg.SubIdx == DefSubReg) { 1325 SrcReg = InsertedReg.Reg; 1326 SrcSubReg = InsertedReg.SubReg; 1327 return true; 1328 } 1329 // #2 Otherwise, if the sub register we are looking for is not partial 1330 // defined by the inserted element, we can look through the main 1331 // register (v0). 1332 const MachineOperand &MODef = Def->getOperand(DefIdx); 1333 // If the result register (Def) and the base register (v0) do not 1334 // have the same register class or if we have to compose 1335 // subregisters, bails out. 1336 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || 1337 BaseReg.SubReg) 1338 return false; 1339 1340 // Get the TRI and check if the inserted sub-register overlaps with the 1341 // sub-register we are tracking. 1342 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 1343 if (!TRI || 1344 (TRI->getSubRegIndexLaneMask(DefSubReg) & 1345 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) 1346 return false; 1347 // At this point, the value is available in v0 via the same subreg 1348 // we used for Def. 1349 SrcReg = BaseReg.Reg; 1350 SrcSubReg = DefSubReg; 1351 return true; 1352 } 1353 1354 bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, 1355 unsigned &SrcSubReg) { 1356 assert((Def->isExtractSubreg() || 1357 Def->isExtractSubregLike()) && "Invalid definition"); 1358 // We are looking at: 1359 // Def = EXTRACT_SUBREG v0, sub0 1360 1361 // Bails if we have to compose sub registers. 1362 // Indeed, if DefSubReg != 0, we would have to compose it with sub0. 1363 if (DefSubReg) 1364 return false; 1365 1366 if (!TII) 1367 // We could handle the EXTRACT_SUBREG here, but we do not want to 1368 // duplicate the code from the generic TII. 1369 return false; 1370 1371 TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; 1372 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) 1373 return false; 1374 1375 // Bails if we have to compose sub registers. 1376 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. 1377 if (ExtractSubregInputReg.SubReg) 1378 return false; 1379 // Otherwise, the value is available in the v0.sub0. 1380 SrcReg = ExtractSubregInputReg.Reg; 1381 SrcSubReg = ExtractSubregInputReg.SubIdx; 1382 return true; 1383 } 1384 1385 bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, 1386 unsigned &SrcSubReg) { 1387 assert(Def->isSubregToReg() && "Invalid definition"); 1388 // We are looking at: 1389 // Def = SUBREG_TO_REG Imm, v0, sub0 1390 1391 // Bails if we have to compose sub registers. 1392 // If DefSubReg != sub0, we would have to check that all the bits 1393 // we track are included in sub0 and if yes, we would have to 1394 // determine the right subreg in v0. 1395 if (DefSubReg != Def->getOperand(3).getImm()) 1396 return false; 1397 // Bails if we have to compose sub registers. 1398 // Likewise, if v0.subreg != 0, we would have to compose it with sub0. 1399 if (Def->getOperand(2).getSubReg()) 1400 return false; 1401 1402 SrcReg = Def->getOperand(2).getReg(); 1403 SrcSubReg = Def->getOperand(3).getImm(); 1404 return true; 1405 } 1406 1407 bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { 1408 assert(Def && "This method needs a valid definition"); 1409 1410 assert( 1411 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && 1412 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); 1413 if (Def->isCopy()) 1414 return getNextSourceFromCopy(SrcReg, SrcSubReg); 1415 if (Def->isBitcast()) 1416 return getNextSourceFromBitcast(SrcReg, SrcSubReg); 1417 // All the remaining cases involve "complex" instructions. 1418 // Bails if we did not ask for the advanced tracking. 1419 if (!UseAdvancedTracking) 1420 return false; 1421 if (Def->isRegSequence() || Def->isRegSequenceLike()) 1422 return getNextSourceFromRegSequence(SrcReg, SrcSubReg); 1423 if (Def->isInsertSubreg() || Def->isInsertSubregLike()) 1424 return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); 1425 if (Def->isExtractSubreg() || Def->isExtractSubregLike()) 1426 return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); 1427 if (Def->isSubregToReg()) 1428 return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); 1429 return false; 1430 } 1431 1432 const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, 1433 unsigned &SrcSubReg) { 1434 // If we reach a point where we cannot move up in the use-def chain, 1435 // there is nothing we can get. 1436 if (!Def) 1437 return nullptr; 1438 1439 const MachineInstr *PrevDef = nullptr; 1440 // Try to find the next source. 1441 if (getNextSourceImpl(SrcReg, SrcSubReg)) { 1442 // Update definition, definition index, and subregister for the 1443 // next call of getNextSource. 1444 // Update the current register. 1445 Reg = SrcReg; 1446 // Update the return value before moving up in the use-def chain. 1447 PrevDef = Def; 1448 // If we can still move up in the use-def chain, move to the next 1449 // defintion. 1450 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 1451 Def = MRI.getVRegDef(Reg); 1452 DefIdx = MRI.def_begin(Reg).getOperandNo(); 1453 DefSubReg = SrcSubReg; 1454 return PrevDef; 1455 } 1456 } 1457 // If we end up here, this means we will not be able to find another source 1458 // for the next iteration. 1459 // Make sure any new call to getNextSource bails out early by cutting the 1460 // use-def chain. 1461 Def = nullptr; 1462 return PrevDef; 1463 } 1464