1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 //                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 //                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 //    $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 //    $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 //    $bound_ctrl==DPP_BOUND_OFF and
31 //    $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 //    $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39 
40 #include "AMDGPU.h"
41 #include "AMDGPUSubtarget.h"
42 #include "llvm/ADT/Statistic.h"
43 #include "llvm/CodeGen/MachineFunctionPass.h"
44 
45 using namespace llvm;
46 
47 #define DEBUG_TYPE "gcn-dpp-combine"
48 
49 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
50 
51 namespace {
52 
53 class GCNDPPCombine : public MachineFunctionPass {
54   MachineRegisterInfo *MRI;
55   const SIInstrInfo *TII;
56 
57   using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
58 
59   MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
60 
61   MachineInstr *createDPPInst(MachineInstr &OrigMI,
62                               MachineInstr &MovMI,
63                               RegSubRegPair CombOldVGPR,
64                               MachineOperand *OldOpnd,
65                               bool CombBCZ) const;
66 
67   MachineInstr *createDPPInst(MachineInstr &OrigMI,
68                               MachineInstr &MovMI,
69                               RegSubRegPair CombOldVGPR,
70                               bool CombBCZ) const;
71 
72   bool hasNoImmOrEqual(MachineInstr &MI,
73                        unsigned OpndName,
74                        int64_t Value,
75                        int64_t Mask = -1) const;
76 
77   bool combineDPPMov(MachineInstr &MI) const;
78 
79 public:
80   static char ID;
81 
82   GCNDPPCombine() : MachineFunctionPass(ID) {
83     initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
84   }
85 
86   bool runOnMachineFunction(MachineFunction &MF) override;
87 
88   StringRef getPassName() const override { return "GCN DPP Combine"; }
89 
90   void getAnalysisUsage(AnalysisUsage &AU) const override {
91     AU.setPreservesCFG();
92     MachineFunctionPass::getAnalysisUsage(AU);
93   }
94 
95   MachineFunctionProperties getRequiredProperties() const override {
96     return MachineFunctionProperties()
97       .set(MachineFunctionProperties::Property::IsSSA);
98   }
99 
100 private:
101   int getDPPOp(unsigned Op) const;
102 };
103 
104 } // end anonymous namespace
105 
106 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
107 
108 char GCNDPPCombine::ID = 0;
109 
110 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
111 
112 FunctionPass *llvm::createGCNDPPCombinePass() {
113   return new GCNDPPCombine();
114 }
115 
116 int GCNDPPCombine::getDPPOp(unsigned Op) const {
117   auto DPP32 = AMDGPU::getDPPOp32(Op);
118   if (DPP32 == -1) {
119     auto E32 = AMDGPU::getVOPe32(Op);
120     DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32);
121   }
122   return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
123 }
124 
125 // tracks the register operand definition and returns:
126 //   1. immediate operand used to initialize the register if found
127 //   2. nullptr if the register operand is undef
128 //   3. the operand itself otherwise
129 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
130   auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
131   if (!Def)
132     return nullptr;
133 
134   switch(Def->getOpcode()) {
135   default: break;
136   case AMDGPU::IMPLICIT_DEF:
137     return nullptr;
138   case AMDGPU::COPY:
139   case AMDGPU::V_MOV_B32_e32: {
140     auto &Op1 = Def->getOperand(1);
141     if (Op1.isImm())
142       return &Op1;
143     break;
144   }
145   }
146   return &OldOpnd;
147 }
148 
149 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
150                                            MachineInstr &MovMI,
151                                            RegSubRegPair CombOldVGPR,
152                                            bool CombBCZ) const {
153   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
154 
155   auto OrigOp = OrigMI.getOpcode();
156   auto DPPOp = getDPPOp(OrigOp);
157   if (DPPOp == -1) {
158     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
159     return nullptr;
160   }
161 
162   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
163                          OrigMI.getDebugLoc(), TII->get(DPPOp))
164     .setMIFlags(OrigMI.getFlags());
165 
166   bool Fail = false;
167   do {
168     auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
169     assert(Dst);
170     DPPInst.add(*Dst);
171     int NumOperands = 1;
172 
173     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
174     if (OldIdx != -1) {
175       assert(OldIdx == NumOperands);
176       assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
177       auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
178       DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
179                      CombOldVGPR.SubReg);
180       ++NumOperands;
181     } else {
182       // TODO: this discards MAC/FMA instructions for now, let's add it later
183       LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
184                            " TBD\n");
185       Fail = true;
186       break;
187     }
188 
189     if (auto *Mod0 = TII->getNamedOperand(OrigMI,
190                                           AMDGPU::OpName::src0_modifiers)) {
191       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
192                                           AMDGPU::OpName::src0_modifiers));
193       assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
194       DPPInst.addImm(Mod0->getImm());
195       ++NumOperands;
196     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
197                    AMDGPU::OpName::src0_modifiers) != -1) {
198       DPPInst.addImm(0);
199       ++NumOperands;
200     }
201     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
202     assert(Src0);
203     if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
204       LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
205       Fail = true;
206       break;
207     }
208     DPPInst.add(*Src0);
209     DPPInst->getOperand(NumOperands).setIsKill(false);
210     ++NumOperands;
211 
212     if (auto *Mod1 = TII->getNamedOperand(OrigMI,
213                                           AMDGPU::OpName::src1_modifiers)) {
214       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
215                                           AMDGPU::OpName::src1_modifiers));
216       assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
217       DPPInst.addImm(Mod1->getImm());
218       ++NumOperands;
219     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
220                    AMDGPU::OpName::src1_modifiers) != -1) {
221       DPPInst.addImm(0);
222       ++NumOperands;
223     }
224     if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
225       if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
226         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
227         Fail = true;
228         break;
229       }
230       DPPInst.add(*Src1);
231       ++NumOperands;
232     }
233 
234     if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
235       if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
236           !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
237         LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
238         Fail = true;
239         break;
240       }
241       DPPInst.add(*Src2);
242     }
243 
244     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
245     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
246     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
247     DPPInst.addImm(CombBCZ ? 1 : 0);
248   } while (false);
249 
250   if (Fail) {
251     DPPInst.getInstr()->eraseFromParent();
252     return nullptr;
253   }
254   LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
255   return DPPInst.getInstr();
256 }
257 
258 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
259   assert(OldOpnd->isImm());
260   switch (OrigMIOp) {
261   default: break;
262   case AMDGPU::V_ADD_U32_e32:
263   case AMDGPU::V_ADD_U32_e64:
264   case AMDGPU::V_ADD_CO_U32_e32:
265   case AMDGPU::V_ADD_CO_U32_e64:
266   case AMDGPU::V_OR_B32_e32:
267   case AMDGPU::V_OR_B32_e64:
268   case AMDGPU::V_SUBREV_U32_e32:
269   case AMDGPU::V_SUBREV_U32_e64:
270   case AMDGPU::V_SUBREV_CO_U32_e32:
271   case AMDGPU::V_SUBREV_CO_U32_e64:
272   case AMDGPU::V_MAX_U32_e32:
273   case AMDGPU::V_MAX_U32_e64:
274   case AMDGPU::V_XOR_B32_e32:
275   case AMDGPU::V_XOR_B32_e64:
276     if (OldOpnd->getImm() == 0)
277       return true;
278     break;
279   case AMDGPU::V_AND_B32_e32:
280   case AMDGPU::V_AND_B32_e64:
281   case AMDGPU::V_MIN_U32_e32:
282   case AMDGPU::V_MIN_U32_e64:
283     if (static_cast<uint32_t>(OldOpnd->getImm()) ==
284         std::numeric_limits<uint32_t>::max())
285       return true;
286     break;
287   case AMDGPU::V_MIN_I32_e32:
288   case AMDGPU::V_MIN_I32_e64:
289     if (static_cast<int32_t>(OldOpnd->getImm()) ==
290         std::numeric_limits<int32_t>::max())
291       return true;
292     break;
293   case AMDGPU::V_MAX_I32_e32:
294   case AMDGPU::V_MAX_I32_e64:
295     if (static_cast<int32_t>(OldOpnd->getImm()) ==
296         std::numeric_limits<int32_t>::min())
297       return true;
298     break;
299   case AMDGPU::V_MUL_I32_I24_e32:
300   case AMDGPU::V_MUL_I32_I24_e64:
301   case AMDGPU::V_MUL_U32_U24_e32:
302   case AMDGPU::V_MUL_U32_U24_e64:
303     if (OldOpnd->getImm() == 1)
304       return true;
305     break;
306   }
307   return false;
308 }
309 
310 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
311                                            MachineInstr &MovMI,
312                                            RegSubRegPair CombOldVGPR,
313                                            MachineOperand *OldOpndValue,
314                                            bool CombBCZ) const {
315   assert(CombOldVGPR.Reg);
316   if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
317     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
318     if (!Src1 || !Src1->isReg()) {
319       LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
320       return nullptr;
321     }
322     if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
323       LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
324       return nullptr;
325     }
326     CombOldVGPR = getRegSubRegPair(*Src1);
327     if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
328       LLVM_DEBUG(dbgs() << "  failed: src1 isn't a VGPR32 register\n");
329       return nullptr;
330     }
331   }
332   return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
333 }
334 
335 // returns true if MI doesn't have OpndName immediate operand or the
336 // operand has Value
337 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
338                                     int64_t Value, int64_t Mask) const {
339   auto *Imm = TII->getNamedOperand(MI, OpndName);
340   if (!Imm)
341     return true;
342 
343   assert(Imm->isImm());
344   return (Imm->getImm() & Mask) == Value;
345 }
346 
347 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
348   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
349   LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
350 
351   auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
352   assert(DstOpnd && DstOpnd->isReg());
353   auto DPPMovReg = DstOpnd->getReg();
354   if (DPPMovReg.isPhysical()) {
355     LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");
356     return false;
357   }
358   if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
359     LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
360                          " for all uses\n");
361     return false;
362   }
363 
364   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
365   assert(RowMaskOpnd && RowMaskOpnd->isImm());
366   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
367   assert(BankMaskOpnd && BankMaskOpnd->isImm());
368   const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
369                             BankMaskOpnd->getImm() == 0xF;
370 
371   auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
372   assert(BCZOpnd && BCZOpnd->isImm());
373   bool BoundCtrlZero = BCZOpnd->getImm();
374 
375   auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
376   auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
377   assert(OldOpnd && OldOpnd->isReg());
378   assert(SrcOpnd && SrcOpnd->isReg());
379   if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
380     LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");
381     return false;
382   }
383 
384   auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
385   // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
386   // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
387   // but the third option is used to distinguish undef from non-immediate
388   // to reuse IMPLICIT_DEF instruction later
389   assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
390 
391   bool CombBCZ = false;
392 
393   if (MaskAllLanes && BoundCtrlZero) { // [1]
394     CombBCZ = true;
395   } else {
396     if (!OldOpndValue || !OldOpndValue->isImm()) {
397       LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
398       return false;
399     }
400 
401     if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
402       LLVM_DEBUG(dbgs() <<
403         "  failed: old reg def and mov should be in the same BB\n");
404       return false;
405     }
406 
407     if (OldOpndValue->getImm() == 0) {
408       if (MaskAllLanes) {
409         assert(!BoundCtrlZero); // by check [1]
410         CombBCZ = true;
411       }
412     } else if (BoundCtrlZero) {
413       assert(!MaskAllLanes); // by check [1]
414       LLVM_DEBUG(dbgs() <<
415         "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
416       return false;
417     }
418   }
419 
420   LLVM_DEBUG(dbgs() << "  old=";
421     if (!OldOpndValue)
422       dbgs() << "undef";
423     else
424       dbgs() << *OldOpndValue;
425     dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
426 
427   SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
428   DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
429   auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
430   // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
431   if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
432     CombOldVGPR = RegSubRegPair(
433       MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
434     auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
435                              TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
436     DPPMIs.push_back(UndefInst.getInstr());
437   }
438 
439   OrigMIs.push_back(&MovMI);
440   bool Rollback = true;
441   SmallVector<MachineOperand*, 16> Uses;
442 
443   for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
444     Uses.push_back(&Use);
445   }
446 
447   while (!Uses.empty()) {
448     MachineOperand *Use = Uses.pop_back_val();
449     Rollback = true;
450 
451     auto &OrigMI = *Use->getParent();
452     LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
453 
454     auto OrigOp = OrigMI.getOpcode();
455     if (OrigOp == AMDGPU::REG_SEQUENCE) {
456       Register FwdReg = OrigMI.getOperand(0).getReg();
457       unsigned FwdSubReg = 0;
458 
459       if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
460         LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
461                              " for all uses\n");
462         break;
463       }
464 
465       unsigned OpNo, E = OrigMI.getNumOperands();
466       for (OpNo = 1; OpNo < E; OpNo += 2) {
467         if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
468           FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
469           break;
470         }
471       }
472 
473       if (!FwdSubReg)
474         break;
475 
476       for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
477         if (Op.getSubReg() == FwdSubReg)
478           Uses.push_back(&Op);
479       }
480       RegSeqWithOpNos[&OrigMI].push_back(OpNo);
481       continue;
482     }
483 
484     if (TII->isVOP3(OrigOp)) {
485       if (!TII->hasVALU32BitEncoding(OrigOp)) {
486         LLVM_DEBUG(dbgs() << "  failed: VOP3 hasn't e32 equivalent\n");
487         break;
488       }
489       // check if other than abs|neg modifiers are set (opsel for example)
490       const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
491       if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
492           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
493           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
494           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
495         LLVM_DEBUG(dbgs() << "  failed: VOP3 has non-default modifiers\n");
496         break;
497       }
498     } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
499       LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3\n");
500       break;
501     }
502 
503     auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
504     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
505     if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]
506       LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
507       break;
508     }
509 
510     assert(Src0 && "Src1 without Src0?");
511     if (Src1 && Src1->isIdenticalTo(*Src0)) {
512       assert(Src1->isReg());
513       LLVM_DEBUG(
514           dbgs()
515           << "  " << OrigMI
516           << "  failed: DPP register is used more than once per instruction\n");
517       break;
518     }
519 
520     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
521     if (Use == Src0) {
522       if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
523                                         OldOpndValue, CombBCZ)) {
524         DPPMIs.push_back(DPPInst);
525         Rollback = false;
526       }
527     } else {
528       assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]
529       auto *BB = OrigMI.getParent();
530       auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
531       BB->insert(OrigMI, NewMI);
532       if (TII->commuteInstruction(*NewMI)) {
533         LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
534         if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
535                                           OldOpndValue, CombBCZ)) {
536           DPPMIs.push_back(DPPInst);
537           Rollback = false;
538         }
539       } else
540         LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
541       NewMI->eraseFromParent();
542     }
543     if (Rollback)
544       break;
545     OrigMIs.push_back(&OrigMI);
546   }
547 
548   Rollback |= !Uses.empty();
549 
550   for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
551     MI->eraseFromParent();
552 
553   if (!Rollback) {
554     for (auto &S : RegSeqWithOpNos) {
555       if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
556         S.first->eraseFromParent();
557         continue;
558       }
559       while (!S.second.empty())
560         S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
561     }
562   }
563 
564   return !Rollback;
565 }
566 
567 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
568   auto &ST = MF.getSubtarget<GCNSubtarget>();
569   if (!ST.hasDPP() || skipFunction(MF.getFunction()))
570     return false;
571 
572   MRI = &MF.getRegInfo();
573   TII = ST.getInstrInfo();
574 
575   bool Changed = false;
576   for (auto &MBB : MF) {
577     for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
578       auto &MI = *I++;
579       if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
580         Changed = true;
581         ++NumDPPMovsCombined;
582       } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
583         auto Split = TII->expandMovDPP64(MI);
584         for (auto M : { Split.first, Split.second }) {
585           if (combineDPPMov(*M))
586             ++NumDPPMovsCombined;
587         }
588         Changed = true;
589       }
590     }
591   }
592   return Changed;
593 }
594