1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 //                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 //                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 //    $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 //    $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 //    $bound_ctrl==DPP_BOUND_OFF and
31 //    $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 //    $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39 
40 #include "AMDGPU.h"
41 #include "AMDGPUSubtarget.h"
42 #include "SIInstrInfo.h"
43 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
44 #include "llvm/ADT/DenseMap.h"
45 #include "llvm/ADT/SmallVector.h"
46 #include "llvm/ADT/Statistic.h"
47 #include "llvm/CodeGen/MachineBasicBlock.h"
48 #include "llvm/CodeGen/MachineFunction.h"
49 #include "llvm/CodeGen/MachineFunctionPass.h"
50 #include "llvm/CodeGen/MachineInstr.h"
51 #include "llvm/CodeGen/MachineInstrBuilder.h"
52 #include "llvm/CodeGen/MachineOperand.h"
53 #include "llvm/CodeGen/MachineRegisterInfo.h"
54 #include "llvm/CodeGen/TargetRegisterInfo.h"
55 #include "llvm/Pass.h"
56 #include <cassert>
57 
58 using namespace llvm;
59 
60 #define DEBUG_TYPE "gcn-dpp-combine"
61 
62 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
63 
64 namespace {
65 
66 class GCNDPPCombine : public MachineFunctionPass {
67   MachineRegisterInfo *MRI;
68   const SIInstrInfo *TII;
69 
70   using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
71 
72   MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
73 
74   MachineInstr *createDPPInst(MachineInstr &OrigMI,
75                               MachineInstr &MovMI,
76                               RegSubRegPair CombOldVGPR,
77                               MachineOperand *OldOpnd,
78                               bool CombBCZ) const;
79 
80   MachineInstr *createDPPInst(MachineInstr &OrigMI,
81                               MachineInstr &MovMI,
82                               RegSubRegPair CombOldVGPR,
83                               bool CombBCZ) const;
84 
85   bool hasNoImmOrEqual(MachineInstr &MI,
86                        unsigned OpndName,
87                        int64_t Value,
88                        int64_t Mask = -1) const;
89 
90   bool combineDPPMov(MachineInstr &MI) const;
91 
92 public:
93   static char ID;
94 
95   GCNDPPCombine() : MachineFunctionPass(ID) {
96     initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
97   }
98 
99   bool runOnMachineFunction(MachineFunction &MF) override;
100 
101   StringRef getPassName() const override { return "GCN DPP Combine"; }
102 
103   void getAnalysisUsage(AnalysisUsage &AU) const override {
104     AU.setPreservesCFG();
105     MachineFunctionPass::getAnalysisUsage(AU);
106   }
107 
108 private:
109   int getDPPOp(unsigned Op) const;
110 };
111 
112 } // end anonymous namespace
113 
114 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
115 
116 char GCNDPPCombine::ID = 0;
117 
118 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
119 
120 FunctionPass *llvm::createGCNDPPCombinePass() {
121   return new GCNDPPCombine();
122 }
123 
124 int GCNDPPCombine::getDPPOp(unsigned Op) const {
125   auto DPP32 = AMDGPU::getDPPOp32(Op);
126   if (DPP32 == -1) {
127     auto E32 = AMDGPU::getVOPe32(Op);
128     DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32);
129   }
130   return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
131 }
132 
133 // tracks the register operand definition and returns:
134 //   1. immediate operand used to initialize the register if found
135 //   2. nullptr if the register operand is undef
136 //   3. the operand itself otherwise
137 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
138   auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
139   if (!Def)
140     return nullptr;
141 
142   switch(Def->getOpcode()) {
143   default: break;
144   case AMDGPU::IMPLICIT_DEF:
145     return nullptr;
146   case AMDGPU::COPY:
147   case AMDGPU::V_MOV_B32_e32: {
148     auto &Op1 = Def->getOperand(1);
149     if (Op1.isImm())
150       return &Op1;
151     break;
152   }
153   }
154   return &OldOpnd;
155 }
156 
157 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
158                                            MachineInstr &MovMI,
159                                            RegSubRegPair CombOldVGPR,
160                                            bool CombBCZ) const {
161   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
162 
163   auto OrigOp = OrigMI.getOpcode();
164   auto DPPOp = getDPPOp(OrigOp);
165   if (DPPOp == -1) {
166     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
167     return nullptr;
168   }
169 
170   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
171                          OrigMI.getDebugLoc(), TII->get(DPPOp))
172     .setMIFlags(OrigMI.getFlags());
173 
174   bool Fail = false;
175   do {
176     auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
177     assert(Dst);
178     DPPInst.add(*Dst);
179     int NumOperands = 1;
180 
181     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
182     if (OldIdx != -1) {
183       assert(OldIdx == NumOperands);
184       assert(isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI));
185       auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
186       DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
187                      CombOldVGPR.SubReg);
188       ++NumOperands;
189     } else {
190       // TODO: this discards MAC/FMA instructions for now, let's add it later
191       LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
192                            " TBD\n");
193       Fail = true;
194       break;
195     }
196 
197     if (auto *Mod0 = TII->getNamedOperand(OrigMI,
198                                           AMDGPU::OpName::src0_modifiers)) {
199       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
200                                           AMDGPU::OpName::src0_modifiers));
201       assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
202       DPPInst.addImm(Mod0->getImm());
203       ++NumOperands;
204     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
205                    AMDGPU::OpName::src0_modifiers) != -1) {
206       DPPInst.addImm(0);
207       ++NumOperands;
208     }
209     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
210     assert(Src0);
211     if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
212       LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
213       Fail = true;
214       break;
215     }
216     DPPInst.add(*Src0);
217     DPPInst->getOperand(NumOperands).setIsKill(false);
218     ++NumOperands;
219 
220     if (auto *Mod1 = TII->getNamedOperand(OrigMI,
221                                           AMDGPU::OpName::src1_modifiers)) {
222       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
223                                           AMDGPU::OpName::src1_modifiers));
224       assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
225       DPPInst.addImm(Mod1->getImm());
226       ++NumOperands;
227     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
228                    AMDGPU::OpName::src1_modifiers) != -1) {
229       DPPInst.addImm(0);
230       ++NumOperands;
231     }
232     if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
233       if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
234         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
235         Fail = true;
236         break;
237       }
238       DPPInst.add(*Src1);
239       ++NumOperands;
240     }
241 
242     if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
243       if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
244           !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
245         LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
246         Fail = true;
247         break;
248       }
249       DPPInst.add(*Src2);
250     }
251 
252     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
253     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
254     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
255     DPPInst.addImm(CombBCZ ? 1 : 0);
256   } while (false);
257 
258   if (Fail) {
259     DPPInst.getInstr()->eraseFromParent();
260     return nullptr;
261   }
262   LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
263   return DPPInst.getInstr();
264 }
265 
266 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
267   assert(OldOpnd->isImm());
268   switch (OrigMIOp) {
269   default: break;
270   case AMDGPU::V_ADD_U32_e32:
271   case AMDGPU::V_ADD_U32_e64:
272   case AMDGPU::V_ADD_I32_e32:
273   case AMDGPU::V_ADD_I32_e64:
274   case AMDGPU::V_OR_B32_e32:
275   case AMDGPU::V_OR_B32_e64:
276   case AMDGPU::V_SUBREV_U32_e32:
277   case AMDGPU::V_SUBREV_U32_e64:
278   case AMDGPU::V_SUBREV_I32_e32:
279   case AMDGPU::V_SUBREV_I32_e64:
280   case AMDGPU::V_MAX_U32_e32:
281   case AMDGPU::V_MAX_U32_e64:
282   case AMDGPU::V_XOR_B32_e32:
283   case AMDGPU::V_XOR_B32_e64:
284     if (OldOpnd->getImm() == 0)
285       return true;
286     break;
287   case AMDGPU::V_AND_B32_e32:
288   case AMDGPU::V_AND_B32_e64:
289   case AMDGPU::V_MIN_U32_e32:
290   case AMDGPU::V_MIN_U32_e64:
291     if (static_cast<uint32_t>(OldOpnd->getImm()) ==
292         std::numeric_limits<uint32_t>::max())
293       return true;
294     break;
295   case AMDGPU::V_MIN_I32_e32:
296   case AMDGPU::V_MIN_I32_e64:
297     if (static_cast<int32_t>(OldOpnd->getImm()) ==
298         std::numeric_limits<int32_t>::max())
299       return true;
300     break;
301   case AMDGPU::V_MAX_I32_e32:
302   case AMDGPU::V_MAX_I32_e64:
303     if (static_cast<int32_t>(OldOpnd->getImm()) ==
304         std::numeric_limits<int32_t>::min())
305       return true;
306     break;
307   case AMDGPU::V_MUL_I32_I24_e32:
308   case AMDGPU::V_MUL_I32_I24_e64:
309   case AMDGPU::V_MUL_U32_U24_e32:
310   case AMDGPU::V_MUL_U32_U24_e64:
311     if (OldOpnd->getImm() == 1)
312       return true;
313     break;
314   }
315   return false;
316 }
317 
318 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
319                                            MachineInstr &MovMI,
320                                            RegSubRegPair CombOldVGPR,
321                                            MachineOperand *OldOpndValue,
322                                            bool CombBCZ) const {
323   assert(CombOldVGPR.Reg);
324   if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
325     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
326     if (!Src1 || !Src1->isReg()) {
327       LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
328       return nullptr;
329     }
330     if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
331       LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
332       return nullptr;
333     }
334     CombOldVGPR = getRegSubRegPair(*Src1);
335     if (!isOfRegClass(CombOldVGPR, AMDGPU::VGPR_32RegClass, *MRI)) {
336       LLVM_DEBUG(dbgs() << "  failed: src1 isn't a VGPR32 register\n");
337       return nullptr;
338     }
339   }
340   return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
341 }
342 
343 // returns true if MI doesn't have OpndName immediate operand or the
344 // operand has Value
345 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
346                                     int64_t Value, int64_t Mask) const {
347   auto *Imm = TII->getNamedOperand(MI, OpndName);
348   if (!Imm)
349     return true;
350 
351   assert(Imm->isImm());
352   return (Imm->getImm() & Mask) == Value;
353 }
354 
355 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
356   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp);
357   LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
358 
359   auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
360   assert(DstOpnd && DstOpnd->isReg());
361   auto DPPMovReg = DstOpnd->getReg();
362   if (DPPMovReg.isPhysical()) {
363     LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");
364     return false;
365   }
366   if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
367     LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
368                          " for all uses\n");
369     return false;
370   }
371 
372   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
373   assert(RowMaskOpnd && RowMaskOpnd->isImm());
374   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
375   assert(BankMaskOpnd && BankMaskOpnd->isImm());
376   const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
377                             BankMaskOpnd->getImm() == 0xF;
378 
379   auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
380   assert(BCZOpnd && BCZOpnd->isImm());
381   bool BoundCtrlZero = BCZOpnd->getImm();
382 
383   auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
384   auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
385   assert(OldOpnd && OldOpnd->isReg());
386   assert(SrcOpnd && SrcOpnd->isReg());
387   if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
388     LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");
389     return false;
390   }
391 
392   auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
393   // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
394   // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
395   // but the third option is used to distinguish undef from non-immediate
396   // to reuse IMPLICIT_DEF instruction later
397   assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
398 
399   bool CombBCZ = false;
400 
401   if (MaskAllLanes && BoundCtrlZero) { // [1]
402     CombBCZ = true;
403   } else {
404     if (!OldOpndValue || !OldOpndValue->isImm()) {
405       LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
406       return false;
407     }
408 
409     if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
410       LLVM_DEBUG(dbgs() <<
411         "  failed: old reg def and mov should be in the same BB\n");
412       return false;
413     }
414 
415     if (OldOpndValue->getImm() == 0) {
416       if (MaskAllLanes) {
417         assert(!BoundCtrlZero); // by check [1]
418         CombBCZ = true;
419       }
420     } else if (BoundCtrlZero) {
421       assert(!MaskAllLanes); // by check [1]
422       LLVM_DEBUG(dbgs() <<
423         "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
424       return false;
425     }
426   }
427 
428   LLVM_DEBUG(dbgs() << "  old=";
429     if (!OldOpndValue)
430       dbgs() << "undef";
431     else
432       dbgs() << *OldOpndValue;
433     dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
434 
435   SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
436   DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
437   auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
438   // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
439   if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
440     CombOldVGPR = RegSubRegPair(
441       MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass));
442     auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
443                              TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
444     DPPMIs.push_back(UndefInst.getInstr());
445   }
446 
447   OrigMIs.push_back(&MovMI);
448   bool Rollback = true;
449   SmallVector<MachineOperand*, 16> Uses;
450 
451   for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
452     Uses.push_back(&Use);
453   }
454 
455   while (!Uses.empty()) {
456     MachineOperand *Use = Uses.pop_back_val();
457     Rollback = true;
458 
459     auto &OrigMI = *Use->getParent();
460     LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
461 
462     auto OrigOp = OrigMI.getOpcode();
463     if (OrigOp == AMDGPU::REG_SEQUENCE) {
464       Register FwdReg = OrigMI.getOperand(0).getReg();
465       unsigned FwdSubReg = 0;
466 
467       if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
468         LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
469                              " for all uses\n");
470         break;
471       }
472 
473       unsigned OpNo, E = OrigMI.getNumOperands();
474       for (OpNo = 1; OpNo < E; OpNo += 2) {
475         if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
476           FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
477           break;
478         }
479       }
480 
481       if (!FwdSubReg)
482         break;
483 
484       for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
485         if (Op.getSubReg() == FwdSubReg)
486           Uses.push_back(&Op);
487       }
488       RegSeqWithOpNos[&OrigMI].push_back(OpNo);
489       continue;
490     }
491 
492     if (TII->isVOP3(OrigOp)) {
493       if (!TII->hasVALU32BitEncoding(OrigOp)) {
494         LLVM_DEBUG(dbgs() << "  failed: VOP3 hasn't e32 equivalent\n");
495         break;
496       }
497       // check if other than abs|neg modifiers are set (opsel for example)
498       const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
499       if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
500           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
501           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
502           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
503         LLVM_DEBUG(dbgs() << "  failed: VOP3 has non-default modifiers\n");
504         break;
505       }
506     } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
507       LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3\n");
508       break;
509     }
510 
511     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
512     if (Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0)) {
513       if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
514                                         OldOpndValue, CombBCZ)) {
515         DPPMIs.push_back(DPPInst);
516         Rollback = false;
517       }
518     } else if (OrigMI.isCommutable() &&
519                Use == TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
520       auto *BB = OrigMI.getParent();
521       auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
522       BB->insert(OrigMI, NewMI);
523       if (TII->commuteInstruction(*NewMI)) {
524         LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
525         if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
526                                           OldOpndValue, CombBCZ)) {
527           DPPMIs.push_back(DPPInst);
528           Rollback = false;
529         }
530       } else
531         LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
532       NewMI->eraseFromParent();
533     } else
534       LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
535     if (Rollback)
536       break;
537     OrigMIs.push_back(&OrigMI);
538   }
539 
540   Rollback |= !Uses.empty();
541 
542   for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
543     MI->eraseFromParent();
544 
545   if (!Rollback) {
546     for (auto &S : RegSeqWithOpNos) {
547       if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
548         S.first->eraseFromParent();
549         continue;
550       }
551       while (!S.second.empty())
552         S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
553     }
554   }
555 
556   return !Rollback;
557 }
558 
559 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
560   auto &ST = MF.getSubtarget<GCNSubtarget>();
561   if (!ST.hasDPP() || skipFunction(MF.getFunction()))
562     return false;
563 
564   MRI = &MF.getRegInfo();
565   TII = ST.getInstrInfo();
566 
567   assert(MRI->isSSA() && "Must be run on SSA");
568 
569   bool Changed = false;
570   for (auto &MBB : MF) {
571     for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
572       auto &MI = *I++;
573       if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
574         Changed = true;
575         ++NumDPPMovsCombined;
576       } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
577         auto Split = TII->expandMovDPP64(MI);
578         for (auto M : { Split.first, Split.second }) {
579           if (combineDPPMov(*M))
580             ++NumDPPMovsCombined;
581         }
582         Changed = true;
583       }
584     }
585   }
586   return Changed;
587 }
588