1 //===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// The pass tries to use the 32-bit encoding for instructions when possible. 9 //===----------------------------------------------------------------------===// 10 // 11 12 #include "AMDGPU.h" 13 #include "AMDGPUSubtarget.h" 14 #include "SIInstrInfo.h" 15 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/CodeGen/MachineFunctionPass.h" 18 #include "llvm/CodeGen/MachineInstrBuilder.h" 19 #include "llvm/CodeGen/MachineRegisterInfo.h" 20 #include "llvm/IR/Constants.h" 21 #include "llvm/IR/Function.h" 22 #include "llvm/IR/LLVMContext.h" 23 #include "llvm/Support/Debug.h" 24 #include "llvm/Support/raw_ostream.h" 25 #include "llvm/Target/TargetMachine.h" 26 27 #define DEBUG_TYPE "si-shrink-instructions" 28 29 STATISTIC(NumInstructionsShrunk, 30 "Number of 64-bit instruction reduced to 32-bit."); 31 STATISTIC(NumLiteralConstantsFolded, 32 "Number of literal constants folded into 32-bit instructions."); 33 34 using namespace llvm; 35 36 namespace { 37 38 class SIShrinkInstructions : public MachineFunctionPass { 39 public: 40 static char ID; 41 42 public: 43 SIShrinkInstructions() : MachineFunctionPass(ID) { 44 } 45 46 bool runOnMachineFunction(MachineFunction &MF) override; 47 48 StringRef getPassName() const override { return "SI Shrink Instructions"; } 49 50 void getAnalysisUsage(AnalysisUsage &AU) const override { 51 AU.setPreservesCFG(); 52 MachineFunctionPass::getAnalysisUsage(AU); 53 } 54 }; 55 56 } // End anonymous namespace. 57 58 INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE, 59 "SI Shrink Instructions", false, false) 60 61 char SIShrinkInstructions::ID = 0; 62 63 FunctionPass *llvm::createSIShrinkInstructionsPass() { 64 return new SIShrinkInstructions(); 65 } 66 67 /// This function checks \p MI for operands defined by a move immediate 68 /// instruction and then folds the literal constant into the instruction if it 69 /// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions. 70 static bool foldImmediates(MachineInstr &MI, const SIInstrInfo *TII, 71 MachineRegisterInfo &MRI, bool TryToCommute = true) { 72 assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI)); 73 74 int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0); 75 76 // Try to fold Src0 77 MachineOperand &Src0 = MI.getOperand(Src0Idx); 78 if (Src0.isReg()) { 79 unsigned Reg = Src0.getReg(); 80 if (TargetRegisterInfo::isVirtualRegister(Reg) && MRI.hasOneUse(Reg)) { 81 MachineInstr *Def = MRI.getUniqueVRegDef(Reg); 82 if (Def && Def->isMoveImmediate()) { 83 MachineOperand &MovSrc = Def->getOperand(1); 84 bool ConstantFolded = false; 85 86 if (MovSrc.isImm() && (isInt<32>(MovSrc.getImm()) || 87 isUInt<32>(MovSrc.getImm()))) { 88 // It's possible to have only one component of a super-reg defined by 89 // a single mov, so we need to clear any subregister flag. 90 Src0.setSubReg(0); 91 Src0.ChangeToImmediate(MovSrc.getImm()); 92 ConstantFolded = true; 93 } else if (MovSrc.isFI()) { 94 Src0.setSubReg(0); 95 Src0.ChangeToFrameIndex(MovSrc.getIndex()); 96 ConstantFolded = true; 97 } 98 99 if (ConstantFolded) { 100 assert(MRI.use_empty(Reg)); 101 Def->eraseFromParent(); 102 ++NumLiteralConstantsFolded; 103 return true; 104 } 105 } 106 } 107 } 108 109 // We have failed to fold src0, so commute the instruction and try again. 110 if (TryToCommute && MI.isCommutable()) { 111 if (TII->commuteInstruction(MI)) { 112 if (foldImmediates(MI, TII, MRI, false)) 113 return true; 114 115 // Commute back. 116 TII->commuteInstruction(MI); 117 } 118 } 119 120 return false; 121 } 122 123 static bool isKImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) { 124 return isInt<16>(Src.getImm()) && 125 !TII->isInlineConstant(*Src.getParent(), 126 Src.getParent()->getOperandNo(&Src)); 127 } 128 129 static bool isKUImmOperand(const SIInstrInfo *TII, const MachineOperand &Src) { 130 return isUInt<16>(Src.getImm()) && 131 !TII->isInlineConstant(*Src.getParent(), 132 Src.getParent()->getOperandNo(&Src)); 133 } 134 135 static bool isKImmOrKUImmOperand(const SIInstrInfo *TII, 136 const MachineOperand &Src, 137 bool &IsUnsigned) { 138 if (isInt<16>(Src.getImm())) { 139 IsUnsigned = false; 140 return !TII->isInlineConstant(Src); 141 } 142 143 if (isUInt<16>(Src.getImm())) { 144 IsUnsigned = true; 145 return !TII->isInlineConstant(Src); 146 } 147 148 return false; 149 } 150 151 /// \returns true if the constant in \p Src should be replaced with a bitreverse 152 /// of an inline immediate. 153 static bool isReverseInlineImm(const SIInstrInfo *TII, 154 const MachineOperand &Src, 155 int32_t &ReverseImm) { 156 if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src)) 157 return false; 158 159 ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm())); 160 return ReverseImm >= -16 && ReverseImm <= 64; 161 } 162 163 /// Copy implicit register operands from specified instruction to this 164 /// instruction that are not part of the instruction definition. 165 static void copyExtraImplicitOps(MachineInstr &NewMI, MachineFunction &MF, 166 const MachineInstr &MI) { 167 for (unsigned i = MI.getDesc().getNumOperands() + 168 MI.getDesc().getNumImplicitUses() + 169 MI.getDesc().getNumImplicitDefs(), e = MI.getNumOperands(); 170 i != e; ++i) { 171 const MachineOperand &MO = MI.getOperand(i); 172 if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask()) 173 NewMI.addOperand(MF, MO); 174 } 175 } 176 177 static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) { 178 // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to 179 // get constants on the RHS. 180 if (!MI.getOperand(0).isReg()) 181 TII->commuteInstruction(MI, false, 0, 1); 182 183 const MachineOperand &Src1 = MI.getOperand(1); 184 if (!Src1.isImm()) 185 return; 186 187 int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode()); 188 if (SOPKOpc == -1) 189 return; 190 191 // eq/ne is special because the imm16 can be treated as signed or unsigned, 192 // and initially selectd to the unsigned versions. 193 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) { 194 bool HasUImm; 195 if (isKImmOrKUImmOperand(TII, Src1, HasUImm)) { 196 if (!HasUImm) { 197 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ? 198 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32; 199 } 200 201 MI.setDesc(TII->get(SOPKOpc)); 202 } 203 204 return; 205 } 206 207 const MCInstrDesc &NewDesc = TII->get(SOPKOpc); 208 209 if ((TII->sopkIsZext(SOPKOpc) && isKUImmOperand(TII, Src1)) || 210 (!TII->sopkIsZext(SOPKOpc) && isKImmOperand(TII, Src1))) { 211 MI.setDesc(NewDesc); 212 } 213 } 214 215 // This is the same as MachineInstr::readsRegister/modifiesRegister except 216 // it takes subregs into account. 217 static bool instAccessReg(iterator_range<MachineInstr::const_mop_iterator> &&R, 218 unsigned Reg, unsigned SubReg, 219 const SIRegisterInfo &TRI) { 220 for (const MachineOperand &MO : R) { 221 if (!MO.isReg()) 222 continue; 223 224 if (TargetRegisterInfo::isPhysicalRegister(Reg) && 225 TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 226 if (TRI.regsOverlap(Reg, MO.getReg())) 227 return true; 228 } else if (MO.getReg() == Reg && 229 TargetRegisterInfo::isVirtualRegister(Reg)) { 230 LaneBitmask Overlap = TRI.getSubRegIndexLaneMask(SubReg) & 231 TRI.getSubRegIndexLaneMask(MO.getSubReg()); 232 if (Overlap.any()) 233 return true; 234 } 235 } 236 return false; 237 } 238 239 static bool instReadsReg(const MachineInstr *MI, 240 unsigned Reg, unsigned SubReg, 241 const SIRegisterInfo &TRI) { 242 return instAccessReg(MI->uses(), Reg, SubReg, TRI); 243 } 244 245 static bool instModifiesReg(const MachineInstr *MI, 246 unsigned Reg, unsigned SubReg, 247 const SIRegisterInfo &TRI) { 248 return instAccessReg(MI->defs(), Reg, SubReg, TRI); 249 } 250 251 static TargetInstrInfo::RegSubRegPair 252 getSubRegForIndex(unsigned Reg, unsigned Sub, unsigned I, 253 const SIRegisterInfo &TRI, const MachineRegisterInfo &MRI) { 254 if (TRI.getRegSizeInBits(Reg, MRI) != 32) { 255 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 256 Reg = TRI.getSubReg(Reg, TRI.getSubRegFromChannel(I)); 257 } else { 258 LaneBitmask LM = TRI.getSubRegIndexLaneMask(Sub); 259 Sub = TRI.getSubRegFromChannel(I + countTrailingZeros(LM.getAsInteger())); 260 } 261 } 262 return TargetInstrInfo::RegSubRegPair(Reg, Sub); 263 } 264 265 // Match: 266 // mov t, x 267 // mov x, y 268 // mov y, t 269 // 270 // => 271 // 272 // mov t, x (t is potentially dead and move eliminated) 273 // v_swap_b32 x, y 274 // 275 // Returns next valid instruction pointer if was able to create v_swap_b32. 276 // 277 // This shall not be done too early not to prevent possible folding which may 278 // remove matched moves, and this should prefereably be done before RA to 279 // release saved registers and also possibly after RA which can insert copies 280 // too. 281 // 282 // This is really just a generic peephole that is not a canocical shrinking, 283 // although requirements match the pass placement and it reduces code size too. 284 static MachineInstr* matchSwap(MachineInstr &MovT, MachineRegisterInfo &MRI, 285 const SIInstrInfo *TII) { 286 assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 || 287 MovT.getOpcode() == AMDGPU::COPY); 288 289 unsigned T = MovT.getOperand(0).getReg(); 290 unsigned Tsub = MovT.getOperand(0).getSubReg(); 291 MachineOperand &Xop = MovT.getOperand(1); 292 293 if (!Xop.isReg()) 294 return nullptr; 295 unsigned X = Xop.getReg(); 296 unsigned Xsub = Xop.getSubReg(); 297 298 unsigned Size = TII->getOpSize(MovT, 0) / 4; 299 300 const SIRegisterInfo &TRI = TII->getRegisterInfo(); 301 if (!TRI.isVGPR(MRI, X)) 302 return nullptr; 303 304 for (MachineOperand &YTop : MRI.use_nodbg_operands(T)) { 305 if (YTop.getSubReg() != Tsub) 306 continue; 307 308 MachineInstr &MovY = *YTop.getParent(); 309 if ((MovY.getOpcode() != AMDGPU::V_MOV_B32_e32 && 310 MovY.getOpcode() != AMDGPU::COPY) || 311 MovY.getOperand(1).getSubReg() != Tsub) 312 continue; 313 314 unsigned Y = MovY.getOperand(0).getReg(); 315 unsigned Ysub = MovY.getOperand(0).getSubReg(); 316 317 if (!TRI.isVGPR(MRI, Y) || MovT.getParent() != MovY.getParent()) 318 continue; 319 320 MachineInstr *MovX = nullptr; 321 auto I = std::next(MovT.getIterator()), E = MovT.getParent()->instr_end(); 322 for (auto IY = MovY.getIterator(); I != E && I != IY; ++I) { 323 if (instReadsReg(&*I, X, Xsub, TRI) || 324 instModifiesReg(&*I, Y, Ysub, TRI) || 325 instModifiesReg(&*I, T, Tsub, TRI) || 326 (MovX && instModifiesReg(&*I, X, Xsub, TRI))) { 327 MovX = nullptr; 328 break; 329 } 330 if (!instReadsReg(&*I, Y, Ysub, TRI)) { 331 if (!MovX && instModifiesReg(&*I, X, Xsub, TRI)) { 332 MovX = nullptr; 333 break; 334 } 335 continue; 336 } 337 if (MovX || 338 (I->getOpcode() != AMDGPU::V_MOV_B32_e32 && 339 I->getOpcode() != AMDGPU::COPY) || 340 I->getOperand(0).getReg() != X || 341 I->getOperand(0).getSubReg() != Xsub) { 342 MovX = nullptr; 343 break; 344 } 345 MovX = &*I; 346 } 347 348 if (!MovX || I == E) 349 continue; 350 351 LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << MovY); 352 353 for (unsigned I = 0; I < Size; ++I) { 354 TargetInstrInfo::RegSubRegPair X1, Y1; 355 X1 = getSubRegForIndex(X, Xsub, I, TRI, MRI); 356 Y1 = getSubRegForIndex(Y, Ysub, I, TRI, MRI); 357 BuildMI(*MovT.getParent(), MovX->getIterator(), MovT.getDebugLoc(), 358 TII->get(AMDGPU::V_SWAP_B32)) 359 .addDef(X1.Reg, 0, X1.SubReg) 360 .addDef(Y1.Reg, 0, Y1.SubReg) 361 .addReg(Y1.Reg, 0, Y1.SubReg) 362 .addReg(X1.Reg, 0, X1.SubReg).getInstr(); 363 } 364 MovX->eraseFromParent(); 365 MovY.eraseFromParent(); 366 MachineInstr *Next = &*std::next(MovT.getIterator()); 367 if (MRI.use_nodbg_empty(T)) 368 MovT.eraseFromParent(); 369 else 370 Xop.setIsKill(false); 371 372 return Next; 373 } 374 375 return nullptr; 376 } 377 378 bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) { 379 if (skipFunction(MF.getFunction())) 380 return false; 381 382 MachineRegisterInfo &MRI = MF.getRegInfo(); 383 const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>(); 384 const SIInstrInfo *TII = ST.getInstrInfo(); 385 386 std::vector<unsigned> I1Defs; 387 388 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 389 BI != BE; ++BI) { 390 391 MachineBasicBlock &MBB = *BI; 392 MachineBasicBlock::iterator I, Next; 393 for (I = MBB.begin(); I != MBB.end(); I = Next) { 394 Next = std::next(I); 395 MachineInstr &MI = *I; 396 397 if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) { 398 // If this has a literal constant source that is the same as the 399 // reversed bits of an inline immediate, replace with a bitreverse of 400 // that constant. This saves 4 bytes in the common case of materializing 401 // sign bits. 402 403 // Test if we are after regalloc. We only want to do this after any 404 // optimizations happen because this will confuse them. 405 // XXX - not exactly a check for post-regalloc run. 406 MachineOperand &Src = MI.getOperand(1); 407 if (Src.isImm() && 408 TargetRegisterInfo::isPhysicalRegister(MI.getOperand(0).getReg())) { 409 int32_t ReverseImm; 410 if (isReverseInlineImm(TII, Src, ReverseImm)) { 411 MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32)); 412 Src.setImm(ReverseImm); 413 continue; 414 } 415 } 416 } 417 418 if (ST.hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 || 419 MI.getOpcode() == AMDGPU::COPY)) { 420 if (auto *NextMI = matchSwap(MI, MRI, TII)) { 421 Next = NextMI->getIterator(); 422 continue; 423 } 424 } 425 426 // Combine adjacent s_nops to use the immediate operand encoding how long 427 // to wait. 428 // 429 // s_nop N 430 // s_nop M 431 // => 432 // s_nop (N + M) 433 if (MI.getOpcode() == AMDGPU::S_NOP && 434 Next != MBB.end() && 435 (*Next).getOpcode() == AMDGPU::S_NOP) { 436 437 MachineInstr &NextMI = *Next; 438 // The instruction encodes the amount to wait with an offset of 1, 439 // i.e. 0 is wait 1 cycle. Convert both to cycles and then convert back 440 // after adding. 441 uint8_t Nop0 = MI.getOperand(0).getImm() + 1; 442 uint8_t Nop1 = NextMI.getOperand(0).getImm() + 1; 443 444 // Make sure we don't overflow the bounds. 445 if (Nop0 + Nop1 <= 8) { 446 NextMI.getOperand(0).setImm(Nop0 + Nop1 - 1); 447 MI.eraseFromParent(); 448 } 449 450 continue; 451 } 452 453 // FIXME: We also need to consider movs of constant operands since 454 // immediate operands are not folded if they have more than one use, and 455 // the operand folding pass is unaware if the immediate will be free since 456 // it won't know if the src == dest constraint will end up being 457 // satisfied. 458 if (MI.getOpcode() == AMDGPU::S_ADD_I32 || 459 MI.getOpcode() == AMDGPU::S_MUL_I32) { 460 const MachineOperand *Dest = &MI.getOperand(0); 461 MachineOperand *Src0 = &MI.getOperand(1); 462 MachineOperand *Src1 = &MI.getOperand(2); 463 464 if (!Src0->isReg() && Src1->isReg()) { 465 if (TII->commuteInstruction(MI, false, 1, 2)) 466 std::swap(Src0, Src1); 467 } 468 469 // FIXME: This could work better if hints worked with subregisters. If 470 // we have a vector add of a constant, we usually don't get the correct 471 // allocation due to the subregister usage. 472 if (TargetRegisterInfo::isVirtualRegister(Dest->getReg()) && 473 Src0->isReg()) { 474 MRI.setRegAllocationHint(Dest->getReg(), 0, Src0->getReg()); 475 MRI.setRegAllocationHint(Src0->getReg(), 0, Dest->getReg()); 476 continue; 477 } 478 479 if (Src0->isReg() && Src0->getReg() == Dest->getReg()) { 480 if (Src1->isImm() && isKImmOperand(TII, *Src1)) { 481 unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ? 482 AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32; 483 484 MI.setDesc(TII->get(Opc)); 485 MI.tieOperands(0, 1); 486 } 487 } 488 } 489 490 // Try to use s_cmpk_* 491 if (MI.isCompare() && TII->isSOPC(MI)) { 492 shrinkScalarCompare(TII, MI); 493 continue; 494 } 495 496 // Try to use S_MOVK_I32, which will save 4 bytes for small immediates. 497 if (MI.getOpcode() == AMDGPU::S_MOV_B32) { 498 const MachineOperand &Dst = MI.getOperand(0); 499 MachineOperand &Src = MI.getOperand(1); 500 501 if (Src.isImm() && 502 TargetRegisterInfo::isPhysicalRegister(Dst.getReg())) { 503 int32_t ReverseImm; 504 if (isKImmOperand(TII, Src)) 505 MI.setDesc(TII->get(AMDGPU::S_MOVK_I32)); 506 else if (isReverseInlineImm(TII, Src, ReverseImm)) { 507 MI.setDesc(TII->get(AMDGPU::S_BREV_B32)); 508 Src.setImm(ReverseImm); 509 } 510 } 511 512 continue; 513 } 514 515 if (!TII->hasVALU32BitEncoding(MI.getOpcode())) 516 continue; 517 518 if (!TII->canShrink(MI, MRI)) { 519 // Try commuting the instruction and see if that enables us to shrink 520 // it. 521 if (!MI.isCommutable() || !TII->commuteInstruction(MI) || 522 !TII->canShrink(MI, MRI)) 523 continue; 524 } 525 526 // getVOPe32 could be -1 here if we started with an instruction that had 527 // a 32-bit encoding and then commuted it to an instruction that did not. 528 if (!TII->hasVALU32BitEncoding(MI.getOpcode())) 529 continue; 530 531 int Op32 = AMDGPU::getVOPe32(MI.getOpcode()); 532 533 if (TII->isVOPC(Op32)) { 534 unsigned DstReg = MI.getOperand(0).getReg(); 535 if (TargetRegisterInfo::isVirtualRegister(DstReg)) { 536 // VOPC instructions can only write to the VCC register. We can't 537 // force them to use VCC here, because this is only one register and 538 // cannot deal with sequences which would require multiple copies of 539 // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...) 540 // 541 // So, instead of forcing the instruction to write to VCC, we provide 542 // a hint to the register allocator to use VCC and then we will run 543 // this pass again after RA and shrink it if it outputs to VCC. 544 MRI.setRegAllocationHint(MI.getOperand(0).getReg(), 0, AMDGPU::VCC); 545 continue; 546 } 547 if (DstReg != AMDGPU::VCC) 548 continue; 549 } 550 551 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) { 552 // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC 553 // instructions. 554 const MachineOperand *Src2 = 555 TII->getNamedOperand(MI, AMDGPU::OpName::src2); 556 if (!Src2->isReg()) 557 continue; 558 unsigned SReg = Src2->getReg(); 559 if (TargetRegisterInfo::isVirtualRegister(SReg)) { 560 MRI.setRegAllocationHint(SReg, 0, AMDGPU::VCC); 561 continue; 562 } 563 if (SReg != AMDGPU::VCC) 564 continue; 565 } 566 567 // Check for the bool flag output for instructions like V_ADD_I32_e64. 568 const MachineOperand *SDst = TII->getNamedOperand(MI, 569 AMDGPU::OpName::sdst); 570 571 // Check the carry-in operand for v_addc_u32_e64. 572 const MachineOperand *Src2 = TII->getNamedOperand(MI, 573 AMDGPU::OpName::src2); 574 575 if (SDst) { 576 if (SDst->getReg() != AMDGPU::VCC) { 577 if (TargetRegisterInfo::isVirtualRegister(SDst->getReg())) 578 MRI.setRegAllocationHint(SDst->getReg(), 0, AMDGPU::VCC); 579 continue; 580 } 581 582 // All of the instructions with carry outs also have an SGPR input in 583 // src2. 584 if (Src2 && Src2->getReg() != AMDGPU::VCC) { 585 if (TargetRegisterInfo::isVirtualRegister(Src2->getReg())) 586 MRI.setRegAllocationHint(Src2->getReg(), 0, AMDGPU::VCC); 587 588 continue; 589 } 590 } 591 592 // We can shrink this instruction 593 LLVM_DEBUG(dbgs() << "Shrinking " << MI); 594 595 MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32); 596 ++NumInstructionsShrunk; 597 598 // Copy extra operands not present in the instruction definition. 599 copyExtraImplicitOps(*Inst32, MF, MI); 600 601 MI.eraseFromParent(); 602 foldImmediates(*Inst32, TII, MRI); 603 604 LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n'); 605 } 606 } 607 return false; 608 } 609