1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 //                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 //                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 //    $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 //    $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 //    $bound_ctrl==DPP_BOUND_OFF and
31 //    $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 //    $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39 
40 #include "AMDGPU.h"
41 #include "AMDGPUSubtarget.h"
42 #include "SIInstrInfo.h"
43 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44 #include "llvm/ADT/DenseMap.h"
45 #include "llvm/ADT/SmallVector.h"
46 #include "llvm/ADT/Statistic.h"
47 #include "llvm/CodeGen/MachineBasicBlock.h"
48 #include "llvm/CodeGen/MachineFunction.h"
49 #include "llvm/CodeGen/MachineFunctionPass.h"
50 #include "llvm/CodeGen/MachineInstr.h"
51 #include "llvm/CodeGen/MachineInstrBuilder.h"
52 #include "llvm/CodeGen/MachineOperand.h"
53 #include "llvm/CodeGen/MachineRegisterInfo.h"
54 #include "llvm/CodeGen/TargetRegisterInfo.h"
55 #include "llvm/Pass.h"
56 #include <cassert>
57 
58 using namespace llvm;
59 
60 #define DEBUG_TYPE "gcn-dpp-combine"
61 
62 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
63 
64 namespace {
65 
66 class GCNDPPCombine : public MachineFunctionPass {
67   MachineRegisterInfo *MRI;
68   const SIInstrInfo *TII;
69 
70   using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
71 
72   MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
73 
74   MachineInstr *createDPPInst(MachineInstr &OrigMI,
75                               MachineInstr &MovMI,
76                               RegSubRegPair CombOldVGPR,
77                               MachineOperand *OldOpnd,
78                               bool CombBCZ) const;
79 
80   MachineInstr *createDPPInst(MachineInstr &OrigMI,
81                               MachineInstr &MovMI,
82                               RegSubRegPair CombOldVGPR,
83                               bool CombBCZ) const;
84 
85   bool hasNoImmOrEqual(MachineInstr &MI,
86                        unsigned OpndName,
87                        int64_t Value,
88                        int64_t Mask = -1) const;
89 
90   bool combineDPPMov(MachineInstr &MI) const;
91 
92 public:
93   static char ID;
94 
95   GCNDPPCombine() : MachineFunctionPass(ID) {
96     initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
97   }
98 
99   bool runOnMachineFunction(MachineFunction &MF) override;
100 
101   StringRef getPassName() const override { return "GCN DPP Combine"; }
102 
103   void getAnalysisUsage(AnalysisUsage &AU) const override {
104     AU.setPreservesCFG();
105     MachineFunctionPass::getAnalysisUsage(AU);
106   }
107 };
108 
109 } // end anonymous namespace
110 
111 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
112 
113 char GCNDPPCombine::ID = 0;
114 
115 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
116 
117 FunctionPass *llvm::createGCNDPPCombinePass() {
118   return new GCNDPPCombine();
119 }
120 
121 static int getDPPOp(unsigned Op) {
122   auto DPP32 = AMDGPU::getDPPOp32(Op);
123   if (DPP32 != -1)
124     return DPP32;
125 
126   auto E32 = AMDGPU::getVOPe32(Op);
127   return E32 != -1 ? AMDGPU::getDPPOp32(E32) : -1;
128 }
129 
130 // tracks the register operand definition and returns:
131 //   1. immediate operand used to initialize the register if found
132 //   2. nullptr if the register operand is undef
133 //   3. the operand itself otherwise
134 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
135   auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
136   if (!Def)
137     return nullptr;
138 
139   switch(Def->getOpcode()) {
140   default: break;
141   case AMDGPU::IMPLICIT_DEF:
142     return nullptr;
143   case AMDGPU::COPY:
144   case AMDGPU::V_MOV_B32_e32: {
145     auto &Op1 = Def->getOperand(1);
146     if (Op1.isImm())
147       return &Op1;
148     break;
149   }
150   }
151   return &OldOpnd;
152 }
153 
154 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
155                                            MachineInstr &MovMI,
156                                            RegSubRegPair CombOldVGPR,
157                                            bool CombBCZ) const {
158   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
159 
160   auto OrigOp = OrigMI.getOpcode();
161   auto DPPOp = getDPPOp(OrigOp);
162   if (DPPOp == -1) {
163     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
164     return nullptr;
165   }
166 
167   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
168                          OrigMI.getDebugLoc(), TII->get(DPPOp));
169   bool Fail = false;
170   do {
171     auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
172     assert(Dst);
173     DPPInst.add(*Dst);
174     int NumOperands = 1;
175 
176     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
177     if (OldIdx != -1) {
178       assert(OldIdx == NumOperands);
179       assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
180       auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
181       DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
182                      CombOldVGPR.SubReg);
183       ++NumOperands;
184     } else {
185       // TODO: this discards MAC/FMA instructions for now, let's add it later
186       LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
187                            " TBD\n");
188       Fail = true;
189       break;
190     }
191 
192     if (auto *Mod0 = TII->getNamedOperand(OrigMI,
193                                           AMDGPU::OpName::src0_modifiers)) {
194       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
195                                           AMDGPU::OpName::src0_modifiers));
196       assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
197       DPPInst.addImm(Mod0->getImm());
198       ++NumOperands;
199     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
200                    AMDGPU::OpName::src0_modifiers) != -1) {
201       DPPInst.addImm(0);
202       ++NumOperands;
203     }
204     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
205     assert(Src0);
206     if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
207       LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
208       Fail = true;
209       break;
210     }
211     DPPInst.add(*Src0);
212     DPPInst->getOperand(NumOperands).setIsKill(false);
213     ++NumOperands;
214 
215     if (auto *Mod1 = TII->getNamedOperand(OrigMI,
216                                           AMDGPU::OpName::src1_modifiers)) {
217       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
218                                           AMDGPU::OpName::src1_modifiers));
219       assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
220       DPPInst.addImm(Mod1->getImm());
221       ++NumOperands;
222     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
223                    AMDGPU::OpName::src1_modifiers) != -1) {
224       DPPInst.addImm(0);
225       ++NumOperands;
226     }
227     if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
228       if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
229         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
230         Fail = true;
231         break;
232       }
233       DPPInst.add(*Src1);
234       ++NumOperands;
235     }
236 
237     if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
238       if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
239           !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
240         LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
241         Fail = true;
242         break;
243       }
244       DPPInst.add(*Src2);
245     }
246 
247     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
248     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
249     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
250     DPPInst.addImm(CombBCZ ? 1 : 0);
251   } while (false);
252 
253   if (Fail) {
254     DPPInst.getInstr()->eraseFromParent();
255     return nullptr;
256   }
257   LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
258   return DPPInst.getInstr();
259 }
260 
261 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
262   assert(OldOpnd->isImm());
263   switch (OrigMIOp) {
264   default: break;
265   case AMDGPU::V_ADD_U32_e32:
266   case AMDGPU::V_ADD_U32_e64:
267   case AMDGPU::V_ADD_I32_e32:
268   case AMDGPU::V_ADD_I32_e64:
269   case AMDGPU::V_OR_B32_e32:
270   case AMDGPU::V_OR_B32_e64:
271   case AMDGPU::V_SUBREV_U32_e32:
272   case AMDGPU::V_SUBREV_U32_e64:
273   case AMDGPU::V_SUBREV_I32_e32:
274   case AMDGPU::V_SUBREV_I32_e64:
275   case AMDGPU::V_MAX_U32_e32:
276   case AMDGPU::V_MAX_U32_e64:
277   case AMDGPU::V_XOR_B32_e32:
278   case AMDGPU::V_XOR_B32_e64:
279     if (OldOpnd->getImm() == 0)
280       return true;
281     break;
282   case AMDGPU::V_AND_B32_e32:
283   case AMDGPU::V_AND_B32_e64:
284   case AMDGPU::V_MIN_U32_e32:
285   case AMDGPU::V_MIN_U32_e64:
286     if (static_cast<uint32_t>(OldOpnd->getImm()) ==
287         std::numeric_limits<uint32_t>::max())
288       return true;
289     break;
290   case AMDGPU::V_MIN_I32_e32:
291   case AMDGPU::V_MIN_I32_e64:
292     if (static_cast<int32_t>(OldOpnd->getImm()) ==
293         std::numeric_limits<int32_t>::max())
294       return true;
295     break;
296   case AMDGPU::V_MAX_I32_e32:
297   case AMDGPU::V_MAX_I32_e64:
298     if (static_cast<int32_t>(OldOpnd->getImm()) ==
299         std::numeric_limits<int32_t>::min())
300       return true;
301     break;
302   case AMDGPU::V_MUL_I32_I24_e32:
303   case AMDGPU::V_MUL_I32_I24_e64:
304   case AMDGPU::V_MUL_U32_U24_e32:
305   case AMDGPU::V_MUL_U32_U24_e64:
306     if (OldOpnd->getImm() == 1)
307       return true;
308     break;
309   }
310   return false;
311 }
312 
313 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
314                                            MachineInstr &MovMI,
315                                            RegSubRegPair CombOldVGPR,
316                                            MachineOperand *OldOpndValue,
317                                            bool CombBCZ) const {
318   assert(CombOldVGPR.Reg);
319   if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
320     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
321     if (!Src1 || !Src1->isReg()) {
322       LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
323       return nullptr;
324     }
325     if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
326       LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
327       return nullptr;
328     }
329     CombOldVGPR = getRegSubRegPair(*Src1);
330     if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
331       LLVM_DEBUG(dbgs() << "  failed: src1 isn't a VGPR32 register\n");
332       return nullptr;
333     }
334   }
335   return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
336 }
337 
338 // returns true if MI doesn't have OpndName immediate operand or the
339 // operand has Value
340 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
341                                     int64_t Value, int64_t Mask) const {
342   auto *Imm = TII->getNamedOperand(MI, OpndName);
343   if (!Imm)
344     return true;
345 
346   assert(Imm->isImm());
347   return (Imm->getImm() & Mask) == Value;
348 }
349 
350 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
351   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
352   LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
353 
354   auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
355   assert(DstOpnd && DstOpnd->isReg());
356   auto DPPMovReg = DstOpnd->getReg();
357   if (DPPMovReg.isPhysical()) {
358     LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");
359     return false;
360   }
361   if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
362     LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
363                          " for all uses\n");
364     return false;
365   }
366 
367   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
368   assert(RowMaskOpnd && RowMaskOpnd->isImm());
369   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
370   assert(BankMaskOpnd && BankMaskOpnd->isImm());
371   const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
372                             BankMaskOpnd->getImm() == 0xF;
373 
374   auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
375   assert(BCZOpnd && BCZOpnd->isImm());
376   bool BoundCtrlZero = BCZOpnd->getImm();
377 
378   auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
379   auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
380   assert(OldOpnd && OldOpnd->isReg());
381   assert(SrcOpnd && SrcOpnd->isReg());
382   if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
383     LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");
384     return false;
385   }
386 
387   auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
388   // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
389   // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
390   // but the third option is used to distinguish undef from non-immediate
391   // to reuse IMPLICIT_DEF instruction later
392   assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
393 
394   bool CombBCZ = false;
395 
396   if (MaskAllLanes && BoundCtrlZero) { // [1]
397     CombBCZ = true;
398   } else {
399     if (!OldOpndValue || !OldOpndValue->isImm()) {
400       LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
401       return false;
402     }
403 
404     if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
405       LLVM_DEBUG(dbgs() <<
406         "  failed: old reg def and mov should be in the same BB\n");
407       return false;
408     }
409 
410     if (OldOpndValue->getImm() == 0) {
411       if (MaskAllLanes) {
412         assert(!BoundCtrlZero); // by check [1]
413         CombBCZ = true;
414       }
415     } else if (BoundCtrlZero) {
416       assert(!MaskAllLanes); // by check [1]
417       LLVM_DEBUG(dbgs() <<
418         "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
419       return false;
420     }
421   }
422 
423   LLVM_DEBUG(dbgs() << "  old=";
424     if (!OldOpndValue)
425       dbgs() << "undef";
426     else
427       dbgs() << *OldOpndValue;
428     dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
429 
430   SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
431   DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
432   auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
433   // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
434   if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
435     CombOldVGPR = RegSubRegPair(
436       MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
437     auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
438                              TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
439     DPPMIs.push_back(UndefInst.getInstr());
440   }
441 
442   OrigMIs.push_back(&MovMI);
443   bool Rollback = true;
444   SmallVector<MachineOperand*, 16> Uses;
445 
446   for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
447     Uses.push_back(&Use);
448   }
449 
450   while (!Uses.empty()) {
451     MachineOperand *Use = Uses.pop_back_val();
452     Rollback = true;
453 
454     auto &OrigMI = *Use->getParent();
455     LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
456 
457     auto OrigOp = OrigMI.getOpcode();
458     if (OrigOp == AMDGPU::REG_SEQUENCE) {
459       Register FwdReg = OrigMI.getOperand(0).getReg();
460       unsigned FwdSubReg = 0;
461 
462       if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
463         LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
464                              " for all uses\n");
465         break;
466       }
467 
468       unsigned OpNo, E = OrigMI.getNumOperands();
469       for (OpNo = 1; OpNo < E; OpNo += 2) {
470         if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
471           FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
472           break;
473         }
474       }
475 
476       if (!FwdSubReg)
477         break;
478 
479       for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
480         if (Op.getSubReg() == FwdSubReg)
481           Uses.push_back(&Op);
482       }
483       RegSeqWithOpNos[&OrigMI].push_back(OpNo);
484       continue;
485     }
486 
487     if (TII->isVOP3(OrigOp)) {
488       if (!TII->hasVALU32BitEncoding(OrigOp)) {
489         LLVM_DEBUG(dbgs() << "  failed: VOP3 hasn't e32 equivalent\n");
490         break;
491       }
492       // check if other than abs|neg modifiers are set (opsel for example)
493       const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
494       if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
495           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
496           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
497           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
498         LLVM_DEBUG(dbgs() << "  failed: VOP3 has non-default modifiers\n");
499         break;
500       }
501     } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
502       LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3\n");
503       break;
504     }
505 
506     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
507     if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
508       if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
509                                         OldOpndValue, CombBCZ)) {
510         DPPMIs.push_back(DPPInst);
511         Rollback = false;
512       }
513     } else if (OrigMI.isCommutable() &&
514                Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
515       auto *BB = OrigMI.getParent();
516       auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
517       BB->insert(OrigMI, NewMI);
518       if (TII->commuteInstruction(*NewMI)) {
519         LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
520         if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
521                                           OldOpndValue, CombBCZ)) {
522           DPPMIs.push_back(DPPInst);
523           Rollback = false;
524         }
525       } else
526         LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
527       NewMI->eraseFromParent();
528     } else
529       LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
530     if (Rollback)
531       break;
532     OrigMIs.push_back(&OrigMI);
533   }
534 
535   Rollback |= !Uses.empty();
536 
537   for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
538     MI->eraseFromParent();
539 
540   if (!Rollback) {
541     for (auto &S : RegSeqWithOpNos) {
542       if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
543         S.first->eraseFromParent();
544         continue;
545       }
546       while (!S.second.empty())
547         S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
548     }
549   }
550 
551   return !Rollback;
552 }
553 
554 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
555   auto &ST = MF.getSubtarget<GCNSubtarget>();
556   if (!ST.hasDPP() || skipFunction(MF.getFunction()))
557     return false;
558 
559   MRI = &MF.getRegInfo();
560   TII = ST.getInstrInfo();
561 
562   assert(MRI->isSSA() && "Must be run on SSA");
563 
564   bool Changed = false;
565   for (auto &MBB : MF) {
566     for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
567       auto &MI = *I++;
568       if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
569         Changed = true;
570         ++NumDPPMovsCombined;
571       } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
572         auto Split = TII->expandMovDPP64(MI);
573         for (auto M : { Split.first, Split.second }) {
574           if (combineDPPMov(*M))
575             ++NumDPPMovsCombined;
576         }
577         Changed = true;
578       }
579     }
580   }
581   return Changed;
582 }
583