1 //=======- GCNDPPCombine.cpp - optimization for DPP instructions ---==========//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 // The pass combines V_MOV_B32_dpp instruction with its VALU uses as a DPP src0
9 // operand. If any of the use instruction cannot be combined with the mov the
10 // whole sequence is reverted.
11 //
12 // $old = ...
13 // $dpp_value = V_MOV_B32_dpp $old, $vgpr_to_be_read_from_other_lane,
14 //                            dpp_controls..., $row_mask, $bank_mask, $bound_ctrl
15 // $res = VALU $dpp_value [, src1]
16 //
17 // to
18 //
19 // $res = VALU_DPP $combined_old, $vgpr_to_be_read_from_other_lane, [src1,]
20 //                 dpp_controls..., $row_mask, $bank_mask, $combined_bound_ctrl
21 //
22 // Combining rules :
23 //
24 // if $row_mask and $bank_mask are fully enabled (0xF) and
25 //    $bound_ctrl==DPP_BOUND_ZERO or $old==0
26 // -> $combined_old = undef,
27 //    $combined_bound_ctrl = DPP_BOUND_ZERO
28 //
29 // if the VALU op is binary and
30 //    $bound_ctrl==DPP_BOUND_OFF and
31 //    $old==identity value (immediate) for the VALU op
32 // -> $combined_old = src1,
33 //    $combined_bound_ctrl = DPP_BOUND_OFF
34 //
35 // Otherwise cancel.
36 //
37 // The mov_dpp instruction should reside in the same BB as all its uses
38 //===----------------------------------------------------------------------===//
39 
40 #include "AMDGPU.h"
41 #include "GCNSubtarget.h"
42 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
43 #include "llvm/ADT/Statistic.h"
44 #include "llvm/CodeGen/MachineFunctionPass.h"
45 
46 using namespace llvm;
47 
48 #define DEBUG_TYPE "gcn-dpp-combine"
49 
50 STATISTIC(NumDPPMovsCombined, "Number of DPP moves combined.");
51 
52 namespace {
53 
54 class GCNDPPCombine : public MachineFunctionPass {
55   MachineRegisterInfo *MRI;
56   const SIInstrInfo *TII;
57 
58   using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
59 
60   MachineOperand *getOldOpndValue(MachineOperand &OldOpnd) const;
61 
62   MachineInstr *createDPPInst(MachineInstr &OrigMI,
63                               MachineInstr &MovMI,
64                               RegSubRegPair CombOldVGPR,
65                               MachineOperand *OldOpnd,
66                               bool CombBCZ) const;
67 
68   MachineInstr *createDPPInst(MachineInstr &OrigMI,
69                               MachineInstr &MovMI,
70                               RegSubRegPair CombOldVGPR,
71                               bool CombBCZ) const;
72 
73   bool hasNoImmOrEqual(MachineInstr &MI,
74                        unsigned OpndName,
75                        int64_t Value,
76                        int64_t Mask = -1) const;
77 
78   bool combineDPPMov(MachineInstr &MI) const;
79 
80 public:
81   static char ID;
82 
83   GCNDPPCombine() : MachineFunctionPass(ID) {
84     initializeGCNDPPCombinePass(*PassRegistry::getPassRegistry());
85   }
86 
87   bool runOnMachineFunction(MachineFunction &MF) override;
88 
89   StringRef getPassName() const override { return "GCN DPP Combine"; }
90 
91   void getAnalysisUsage(AnalysisUsage &AU) const override {
92     AU.setPreservesCFG();
93     MachineFunctionPass::getAnalysisUsage(AU);
94   }
95 
96   MachineFunctionProperties getRequiredProperties() const override {
97     return MachineFunctionProperties()
98       .set(MachineFunctionProperties::Property::IsSSA);
99   }
100 
101 private:
102   int getDPPOp(unsigned Op) const;
103 };
104 
105 } // end anonymous namespace
106 
107 INITIALIZE_PASS(GCNDPPCombine, DEBUG_TYPE, "GCN DPP Combine", false, false)
108 
109 char GCNDPPCombine::ID = 0;
110 
111 char &llvm::GCNDPPCombineID = GCNDPPCombine::ID;
112 
113 FunctionPass *llvm::createGCNDPPCombinePass() {
114   return new GCNDPPCombine();
115 }
116 
117 int GCNDPPCombine::getDPPOp(unsigned Op) const {
118   auto DPP32 = AMDGPU::getDPPOp32(Op);
119   if (DPP32 == -1) {
120     auto E32 = AMDGPU::getVOPe32(Op);
121     DPP32 = (E32 == -1)? -1 : AMDGPU::getDPPOp32(E32);
122   }
123   return (DPP32 == -1 || TII->pseudoToMCOpcode(DPP32) == -1) ? -1 : DPP32;
124 }
125 
126 // tracks the register operand definition and returns:
127 //   1. immediate operand used to initialize the register if found
128 //   2. nullptr if the register operand is undef
129 //   3. the operand itself otherwise
130 MachineOperand *GCNDPPCombine::getOldOpndValue(MachineOperand &OldOpnd) const {
131   auto *Def = getVRegSubRegDef(getRegSubRegPair(OldOpnd), *MRI);
132   if (!Def)
133     return nullptr;
134 
135   switch(Def->getOpcode()) {
136   default: break;
137   case AMDGPU::IMPLICIT_DEF:
138     return nullptr;
139   case AMDGPU::COPY:
140   case AMDGPU::V_MOV_B32_e32:
141   case AMDGPU::V_MOV_B64_PSEUDO: {
142     auto &Op1 = Def->getOperand(1);
143     if (Op1.isImm())
144       return &Op1;
145     break;
146   }
147   }
148   return &OldOpnd;
149 }
150 
151 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
152                                            MachineInstr &MovMI,
153                                            RegSubRegPair CombOldVGPR,
154                                            bool CombBCZ) const {
155   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
156          MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
157 
158   auto OrigOp = OrigMI.getOpcode();
159   auto DPPOp = getDPPOp(OrigOp);
160   if (DPPOp == -1) {
161     LLVM_DEBUG(dbgs() << "  failed: no DPP opcode\n");
162     return nullptr;
163   }
164 
165   auto DPPInst = BuildMI(*OrigMI.getParent(), OrigMI,
166                          OrigMI.getDebugLoc(), TII->get(DPPOp))
167     .setMIFlags(OrigMI.getFlags());
168 
169   bool Fail = false;
170   do {
171     auto *Dst = TII->getNamedOperand(OrigMI, AMDGPU::OpName::vdst);
172     assert(Dst);
173     DPPInst.add(*Dst);
174     int NumOperands = 1;
175 
176     const int OldIdx = AMDGPU::getNamedOperandIdx(DPPOp, AMDGPU::OpName::old);
177     if (OldIdx != -1) {
178       assert(OldIdx == NumOperands);
179       assert(isOfRegClass(
180           CombOldVGPR,
181           *MRI->getRegClass(
182               TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst)->getReg()),
183           *MRI));
184       auto *Def = getVRegSubRegDef(CombOldVGPR, *MRI);
185       DPPInst.addReg(CombOldVGPR.Reg, Def ? 0 : RegState::Undef,
186                      CombOldVGPR.SubReg);
187       ++NumOperands;
188     } else {
189       // TODO: this discards MAC/FMA instructions for now, let's add it later
190       LLVM_DEBUG(dbgs() << "  failed: no old operand in DPP instruction,"
191                            " TBD\n");
192       Fail = true;
193       break;
194     }
195 
196     if (auto *Mod0 = TII->getNamedOperand(OrigMI,
197                                           AMDGPU::OpName::src0_modifiers)) {
198       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
199                                           AMDGPU::OpName::src0_modifiers));
200       assert(0LL == (Mod0->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
201       DPPInst.addImm(Mod0->getImm());
202       ++NumOperands;
203     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
204                    AMDGPU::OpName::src0_modifiers) != -1) {
205       DPPInst.addImm(0);
206       ++NumOperands;
207     }
208     auto *Src0 = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
209     assert(Src0);
210     if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src0)) {
211       LLVM_DEBUG(dbgs() << "  failed: src0 is illegal\n");
212       Fail = true;
213       break;
214     }
215     DPPInst.add(*Src0);
216     DPPInst->getOperand(NumOperands).setIsKill(false);
217     ++NumOperands;
218 
219     if (auto *Mod1 = TII->getNamedOperand(OrigMI,
220                                           AMDGPU::OpName::src1_modifiers)) {
221       assert(NumOperands == AMDGPU::getNamedOperandIdx(DPPOp,
222                                           AMDGPU::OpName::src1_modifiers));
223       assert(0LL == (Mod1->getImm() & ~(SISrcMods::ABS | SISrcMods::NEG)));
224       DPPInst.addImm(Mod1->getImm());
225       ++NumOperands;
226     } else if (AMDGPU::getNamedOperandIdx(DPPOp,
227                    AMDGPU::OpName::src1_modifiers) != -1) {
228       DPPInst.addImm(0);
229       ++NumOperands;
230     }
231     if (auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1)) {
232       if (!TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src1)) {
233         LLVM_DEBUG(dbgs() << "  failed: src1 is illegal\n");
234         Fail = true;
235         break;
236       }
237       DPPInst.add(*Src1);
238       ++NumOperands;
239     }
240 
241     if (auto *Src2 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src2)) {
242       if (!TII->getNamedOperand(*DPPInst.getInstr(), AMDGPU::OpName::src2) ||
243           !TII->isOperandLegal(*DPPInst.getInstr(), NumOperands, Src2)) {
244         LLVM_DEBUG(dbgs() << "  failed: src2 is illegal\n");
245         Fail = true;
246         break;
247       }
248       DPPInst.add(*Src2);
249     }
250 
251     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl));
252     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask));
253     DPPInst.add(*TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask));
254     DPPInst.addImm(CombBCZ ? 1 : 0);
255   } while (false);
256 
257   if (Fail) {
258     DPPInst.getInstr()->eraseFromParent();
259     return nullptr;
260   }
261   LLVM_DEBUG(dbgs() << "  combined:  " << *DPPInst.getInstr());
262   return DPPInst.getInstr();
263 }
264 
265 static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
266   assert(OldOpnd->isImm());
267   switch (OrigMIOp) {
268   default: break;
269   case AMDGPU::V_ADD_U32_e32:
270   case AMDGPU::V_ADD_U32_e64:
271   case AMDGPU::V_ADD_CO_U32_e32:
272   case AMDGPU::V_ADD_CO_U32_e64:
273   case AMDGPU::V_OR_B32_e32:
274   case AMDGPU::V_OR_B32_e64:
275   case AMDGPU::V_SUBREV_U32_e32:
276   case AMDGPU::V_SUBREV_U32_e64:
277   case AMDGPU::V_SUBREV_CO_U32_e32:
278   case AMDGPU::V_SUBREV_CO_U32_e64:
279   case AMDGPU::V_MAX_U32_e32:
280   case AMDGPU::V_MAX_U32_e64:
281   case AMDGPU::V_XOR_B32_e32:
282   case AMDGPU::V_XOR_B32_e64:
283     if (OldOpnd->getImm() == 0)
284       return true;
285     break;
286   case AMDGPU::V_AND_B32_e32:
287   case AMDGPU::V_AND_B32_e64:
288   case AMDGPU::V_MIN_U32_e32:
289   case AMDGPU::V_MIN_U32_e64:
290     if (static_cast<uint32_t>(OldOpnd->getImm()) ==
291         std::numeric_limits<uint32_t>::max())
292       return true;
293     break;
294   case AMDGPU::V_MIN_I32_e32:
295   case AMDGPU::V_MIN_I32_e64:
296     if (static_cast<int32_t>(OldOpnd->getImm()) ==
297         std::numeric_limits<int32_t>::max())
298       return true;
299     break;
300   case AMDGPU::V_MAX_I32_e32:
301   case AMDGPU::V_MAX_I32_e64:
302     if (static_cast<int32_t>(OldOpnd->getImm()) ==
303         std::numeric_limits<int32_t>::min())
304       return true;
305     break;
306   case AMDGPU::V_MUL_I32_I24_e32:
307   case AMDGPU::V_MUL_I32_I24_e64:
308   case AMDGPU::V_MUL_U32_U24_e32:
309   case AMDGPU::V_MUL_U32_U24_e64:
310     if (OldOpnd->getImm() == 1)
311       return true;
312     break;
313   }
314   return false;
315 }
316 
317 MachineInstr *GCNDPPCombine::createDPPInst(MachineInstr &OrigMI,
318                                            MachineInstr &MovMI,
319                                            RegSubRegPair CombOldVGPR,
320                                            MachineOperand *OldOpndValue,
321                                            bool CombBCZ) const {
322   assert(CombOldVGPR.Reg);
323   if (!CombBCZ && OldOpndValue && OldOpndValue->isImm()) {
324     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
325     if (!Src1 || !Src1->isReg()) {
326       LLVM_DEBUG(dbgs() << "  failed: no src1 or it isn't a register\n");
327       return nullptr;
328     }
329     if (!isIdentityValue(OrigMI.getOpcode(), OldOpndValue)) {
330       LLVM_DEBUG(dbgs() << "  failed: old immediate isn't an identity\n");
331       return nullptr;
332     }
333     CombOldVGPR = getRegSubRegPair(*Src1);
334     auto MovDst = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
335     const TargetRegisterClass *RC = MRI->getRegClass(MovDst->getReg());
336     if (!isOfRegClass(CombOldVGPR, *RC, *MRI)) {
337       LLVM_DEBUG(dbgs() << "  failed: src1 has wrong register class\n");
338       return nullptr;
339     }
340   }
341   return createDPPInst(OrigMI, MovMI, CombOldVGPR, CombBCZ);
342 }
343 
344 // returns true if MI doesn't have OpndName immediate operand or the
345 // operand has Value
346 bool GCNDPPCombine::hasNoImmOrEqual(MachineInstr &MI, unsigned OpndName,
347                                     int64_t Value, int64_t Mask) const {
348   auto *Imm = TII->getNamedOperand(MI, OpndName);
349   if (!Imm)
350     return true;
351 
352   assert(Imm->isImm());
353   return (Imm->getImm() & Mask) == Value;
354 }
355 
356 bool GCNDPPCombine::combineDPPMov(MachineInstr &MovMI) const {
357   assert(MovMI.getOpcode() == AMDGPU::V_MOV_B32_dpp ||
358          MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO);
359   LLVM_DEBUG(dbgs() << "\nDPP combine: " << MovMI);
360 
361   auto *DstOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::vdst);
362   assert(DstOpnd && DstOpnd->isReg());
363   auto DPPMovReg = DstOpnd->getReg();
364   if (DPPMovReg.isPhysical()) {
365     LLVM_DEBUG(dbgs() << "  failed: dpp move writes physreg\n");
366     return false;
367   }
368   if (execMayBeModifiedBeforeAnyUse(*MRI, DPPMovReg, MovMI)) {
369     LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
370                          " for all uses\n");
371     return false;
372   }
373 
374   if (MovMI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
375     auto *DppCtrl = TII->getNamedOperand(MovMI, AMDGPU::OpName::dpp_ctrl);
376     assert(DppCtrl && DppCtrl->isImm());
377     if (!AMDGPU::isLegal64BitDPPControl(DppCtrl->getImm())) {
378       LLVM_DEBUG(dbgs() << "  failed: 64 bit dpp move uses unsupported"
379                            " control value\n");
380       // Let it split, then control may become legal.
381       return false;
382     }
383   }
384 
385   auto *RowMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::row_mask);
386   assert(RowMaskOpnd && RowMaskOpnd->isImm());
387   auto *BankMaskOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bank_mask);
388   assert(BankMaskOpnd && BankMaskOpnd->isImm());
389   const bool MaskAllLanes = RowMaskOpnd->getImm() == 0xF &&
390                             BankMaskOpnd->getImm() == 0xF;
391 
392   auto *BCZOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::bound_ctrl);
393   assert(BCZOpnd && BCZOpnd->isImm());
394   bool BoundCtrlZero = BCZOpnd->getImm();
395 
396   auto *OldOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::old);
397   auto *SrcOpnd = TII->getNamedOperand(MovMI, AMDGPU::OpName::src0);
398   assert(OldOpnd && OldOpnd->isReg());
399   assert(SrcOpnd && SrcOpnd->isReg());
400   if (OldOpnd->getReg().isPhysical() || SrcOpnd->getReg().isPhysical()) {
401     LLVM_DEBUG(dbgs() << "  failed: dpp move reads physreg\n");
402     return false;
403   }
404 
405   auto * const OldOpndValue = getOldOpndValue(*OldOpnd);
406   // OldOpndValue is either undef (IMPLICIT_DEF) or immediate or something else
407   // We could use: assert(!OldOpndValue || OldOpndValue->isImm())
408   // but the third option is used to distinguish undef from non-immediate
409   // to reuse IMPLICIT_DEF instruction later
410   assert(!OldOpndValue || OldOpndValue->isImm() || OldOpndValue == OldOpnd);
411 
412   bool CombBCZ = false;
413 
414   if (MaskAllLanes && BoundCtrlZero) { // [1]
415     CombBCZ = true;
416   } else {
417     if (!OldOpndValue || !OldOpndValue->isImm()) {
418       LLVM_DEBUG(dbgs() << "  failed: the DPP mov isn't combinable\n");
419       return false;
420     }
421 
422     if (OldOpndValue->getParent()->getParent() != MovMI.getParent()) {
423       LLVM_DEBUG(dbgs() <<
424         "  failed: old reg def and mov should be in the same BB\n");
425       return false;
426     }
427 
428     if (OldOpndValue->getImm() == 0) {
429       if (MaskAllLanes) {
430         assert(!BoundCtrlZero); // by check [1]
431         CombBCZ = true;
432       }
433     } else if (BoundCtrlZero) {
434       assert(!MaskAllLanes); // by check [1]
435       LLVM_DEBUG(dbgs() <<
436         "  failed: old!=0 and bctrl:0 and not all lanes isn't combinable\n");
437       return false;
438     }
439   }
440 
441   LLVM_DEBUG(dbgs() << "  old=";
442     if (!OldOpndValue)
443       dbgs() << "undef";
444     else
445       dbgs() << *OldOpndValue;
446     dbgs() << ", bound_ctrl=" << CombBCZ << '\n');
447 
448   SmallVector<MachineInstr*, 4> OrigMIs, DPPMIs;
449   DenseMap<MachineInstr*, SmallVector<unsigned, 4>> RegSeqWithOpNos;
450   auto CombOldVGPR = getRegSubRegPair(*OldOpnd);
451   // try to reuse previous old reg if its undefined (IMPLICIT_DEF)
452   if (CombBCZ && OldOpndValue) { // CombOldVGPR should be undef
453     const TargetRegisterClass *RC = MRI->getRegClass(DPPMovReg);
454     CombOldVGPR = RegSubRegPair(
455       MRI->createVirtualRegister(RC));
456     auto UndefInst = BuildMI(*MovMI.getParent(), MovMI, MovMI.getDebugLoc(),
457                              TII->get(AMDGPU::IMPLICIT_DEF), CombOldVGPR.Reg);
458     DPPMIs.push_back(UndefInst.getInstr());
459   }
460 
461   OrigMIs.push_back(&MovMI);
462   bool Rollback = true;
463   SmallVector<MachineOperand*, 16> Uses;
464 
465   for (auto &Use : MRI->use_nodbg_operands(DPPMovReg)) {
466     Uses.push_back(&Use);
467   }
468 
469   while (!Uses.empty()) {
470     MachineOperand *Use = Uses.pop_back_val();
471     Rollback = true;
472 
473     auto &OrigMI = *Use->getParent();
474     LLVM_DEBUG(dbgs() << "  try: " << OrigMI);
475 
476     auto OrigOp = OrigMI.getOpcode();
477     if (OrigOp == AMDGPU::REG_SEQUENCE) {
478       Register FwdReg = OrigMI.getOperand(0).getReg();
479       unsigned FwdSubReg = 0;
480 
481       if (execMayBeModifiedBeforeAnyUse(*MRI, FwdReg, OrigMI)) {
482         LLVM_DEBUG(dbgs() << "  failed: EXEC mask should remain the same"
483                              " for all uses\n");
484         break;
485       }
486 
487       unsigned OpNo, E = OrigMI.getNumOperands();
488       for (OpNo = 1; OpNo < E; OpNo += 2) {
489         if (OrigMI.getOperand(OpNo).getReg() == DPPMovReg) {
490           FwdSubReg = OrigMI.getOperand(OpNo + 1).getImm();
491           break;
492         }
493       }
494 
495       if (!FwdSubReg)
496         break;
497 
498       for (auto &Op : MRI->use_nodbg_operands(FwdReg)) {
499         if (Op.getSubReg() == FwdSubReg)
500           Uses.push_back(&Op);
501       }
502       RegSeqWithOpNos[&OrigMI].push_back(OpNo);
503       continue;
504     }
505 
506     if (TII->isVOP3(OrigOp)) {
507       if (!TII->hasVALU32BitEncoding(OrigOp)) {
508         LLVM_DEBUG(dbgs() << "  failed: VOP3 hasn't e32 equivalent\n");
509         break;
510       }
511       // check if other than abs|neg modifiers are set (opsel for example)
512       const int64_t Mask = ~(SISrcMods::ABS | SISrcMods::NEG);
513       if (!hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src0_modifiers, 0, Mask) ||
514           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::src1_modifiers, 0, Mask) ||
515           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::clamp, 0) ||
516           !hasNoImmOrEqual(OrigMI, AMDGPU::OpName::omod, 0)) {
517         LLVM_DEBUG(dbgs() << "  failed: VOP3 has non-default modifiers\n");
518         break;
519       }
520     } else if (!TII->isVOP1(OrigOp) && !TII->isVOP2(OrigOp)) {
521       LLVM_DEBUG(dbgs() << "  failed: not VOP1/2/3\n");
522       break;
523     }
524 
525     auto *Src0 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src0);
526     auto *Src1 = TII->getNamedOperand(OrigMI, AMDGPU::OpName::src1);
527     if (Use != Src0 && !(Use == Src1 && OrigMI.isCommutable())) { // [1]
528       LLVM_DEBUG(dbgs() << "  failed: no suitable operands\n");
529       break;
530     }
531 
532     assert(Src0 && "Src1 without Src0?");
533     if (Src1 && Src1->isIdenticalTo(*Src0)) {
534       assert(Src1->isReg());
535       LLVM_DEBUG(
536           dbgs()
537           << "  " << OrigMI
538           << "  failed: DPP register is used more than once per instruction\n");
539       break;
540     }
541 
542     LLVM_DEBUG(dbgs() << "  combining: " << OrigMI);
543     if (Use == Src0) {
544       if (auto *DPPInst = createDPPInst(OrigMI, MovMI, CombOldVGPR,
545                                         OldOpndValue, CombBCZ)) {
546         DPPMIs.push_back(DPPInst);
547         Rollback = false;
548       }
549     } else {
550       assert(Use == Src1 && OrigMI.isCommutable()); // by check [1]
551       auto *BB = OrigMI.getParent();
552       auto *NewMI = BB->getParent()->CloneMachineInstr(&OrigMI);
553       BB->insert(OrigMI, NewMI);
554       if (TII->commuteInstruction(*NewMI)) {
555         LLVM_DEBUG(dbgs() << "  commuted:  " << *NewMI);
556         if (auto *DPPInst = createDPPInst(*NewMI, MovMI, CombOldVGPR,
557                                           OldOpndValue, CombBCZ)) {
558           DPPMIs.push_back(DPPInst);
559           Rollback = false;
560         }
561       } else
562         LLVM_DEBUG(dbgs() << "  failed: cannot be commuted\n");
563       NewMI->eraseFromParent();
564     }
565     if (Rollback)
566       break;
567     OrigMIs.push_back(&OrigMI);
568   }
569 
570   Rollback |= !Uses.empty();
571 
572   for (auto *MI : *(Rollback? &DPPMIs : &OrigMIs))
573     MI->eraseFromParent();
574 
575   if (!Rollback) {
576     for (auto &S : RegSeqWithOpNos) {
577       if (MRI->use_nodbg_empty(S.first->getOperand(0).getReg())) {
578         S.first->eraseFromParent();
579         continue;
580       }
581       while (!S.second.empty())
582         S.first->getOperand(S.second.pop_back_val()).setIsUndef(true);
583     }
584   }
585 
586   return !Rollback;
587 }
588 
589 bool GCNDPPCombine::runOnMachineFunction(MachineFunction &MF) {
590   auto &ST = MF.getSubtarget<GCNSubtarget>();
591   if (!ST.hasDPP() || skipFunction(MF.getFunction()))
592     return false;
593 
594   MRI = &MF.getRegInfo();
595   TII = ST.getInstrInfo();
596 
597   bool Changed = false;
598   for (auto &MBB : MF) {
599     for (auto I = MBB.rbegin(), E = MBB.rend(); I != E;) {
600       auto &MI = *I++;
601       if (MI.getOpcode() == AMDGPU::V_MOV_B32_dpp && combineDPPMov(MI)) {
602         Changed = true;
603         ++NumDPPMovsCombined;
604       } else if (MI.getOpcode() == AMDGPU::V_MOV_B64_DPP_PSEUDO) {
605         if (ST.has64BitDPP() && combineDPPMov(MI)) {
606           Changed = true;
607           ++NumDPPMovsCombined;
608         } else {
609           auto Split = TII->expandMovDPP64(MI);
610           for (auto M : { Split.first, Split.second }) {
611             if (M && combineDPPMov(*M))
612               ++NumDPPMovsCombined;
613           }
614           Changed = true;
615         }
616       }
617     }
618   }
619   return Changed;
620 }
621