1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/CodeGen/GlobalISel/Combiner.h"
10 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
12 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
13 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
14 #include "llvm/CodeGen/GlobalISel/Utils.h"
15 #include "llvm/CodeGen/MachineDominators.h"
16 #include "llvm/CodeGen/MachineFrameInfo.h"
17 #include "llvm/CodeGen/MachineInstr.h"
18 #include "llvm/CodeGen/MachineRegisterInfo.h"
19 #include "llvm/CodeGen/TargetInstrInfo.h"
20 #include "llvm/CodeGen/TargetLowering.h"
21 #include "llvm/Support/MathExtras.h"
22 #include "llvm/Target/TargetMachine.h"
23 
24 #define DEBUG_TYPE "gi-combiner"
25 
26 using namespace llvm;
27 using namespace MIPatternMatch;
28 
29 // Option to allow testing of the combiner while no targets know about indexed
30 // addressing.
31 static cl::opt<bool>
32     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
33                        cl::desc("Force all indexed operations to be "
34                                 "legal for the GlobalISel combiner"));
35 
36 
37 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
38                                MachineIRBuilder &B, GISelKnownBits *KB,
39                                MachineDominatorTree *MDT)
40     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
41       KB(KB), MDT(MDT) {
42   (void)this->KB;
43 }
44 
45 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
46                                     Register ToReg) const {
47   Observer.changingAllUsesOfReg(MRI, FromReg);
48 
49   if (MRI.constrainRegAttrs(ToReg, FromReg))
50     MRI.replaceRegWith(FromReg, ToReg);
51   else
52     Builder.buildCopy(ToReg, FromReg);
53 
54   Observer.finishedChangingAllUsesOfReg();
55 }
56 
57 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
58                                       MachineOperand &FromRegOp,
59                                       Register ToReg) const {
60   assert(FromRegOp.getParent() && "Expected an operand in an MI");
61   Observer.changingInstr(*FromRegOp.getParent());
62 
63   FromRegOp.setReg(ToReg);
64 
65   Observer.changedInstr(*FromRegOp.getParent());
66 }
67 
68 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
69   if (matchCombineCopy(MI)) {
70     applyCombineCopy(MI);
71     return true;
72   }
73   return false;
74 }
75 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
76   if (MI.getOpcode() != TargetOpcode::COPY)
77     return false;
78   Register DstReg = MI.getOperand(0).getReg();
79   Register SrcReg = MI.getOperand(1).getReg();
80   return canReplaceReg(DstReg, SrcReg, MRI);
81 }
82 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
83   Register DstReg = MI.getOperand(0).getReg();
84   Register SrcReg = MI.getOperand(1).getReg();
85   MI.eraseFromParent();
86   replaceRegWith(MRI, DstReg, SrcReg);
87 }
88 
89 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
90   bool IsUndef = false;
91   SmallVector<Register, 4> Ops;
92   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
93     applyCombineConcatVectors(MI, IsUndef, Ops);
94     return true;
95   }
96   return false;
97 }
98 
99 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
100                                                SmallVectorImpl<Register> &Ops) {
101   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
102          "Invalid instruction");
103   IsUndef = true;
104   MachineInstr *Undef = nullptr;
105 
106   // Walk over all the operands of concat vectors and check if they are
107   // build_vector themselves or undef.
108   // Then collect their operands in Ops.
109   for (const MachineOperand &MO : MI.uses()) {
110     Register Reg = MO.getReg();
111     MachineInstr *Def = MRI.getVRegDef(Reg);
112     assert(Def && "Operand not defined");
113     switch (Def->getOpcode()) {
114     case TargetOpcode::G_BUILD_VECTOR:
115       IsUndef = false;
116       // Remember the operands of the build_vector to fold
117       // them into the yet-to-build flattened concat vectors.
118       for (const MachineOperand &BuildVecMO : Def->uses())
119         Ops.push_back(BuildVecMO.getReg());
120       break;
121     case TargetOpcode::G_IMPLICIT_DEF: {
122       LLT OpType = MRI.getType(Reg);
123       // Keep one undef value for all the undef operands.
124       if (!Undef) {
125         Builder.setInsertPt(*MI.getParent(), MI);
126         Undef = Builder.buildUndef(OpType.getScalarType());
127       }
128       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
129                  OpType.getScalarType() &&
130              "All undefs should have the same type");
131       // Break the undef vector in as many scalar elements as needed
132       // for the flattening.
133       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
134            EltIdx != EltEnd; ++EltIdx)
135         Ops.push_back(Undef->getOperand(0).getReg());
136       break;
137     }
138     default:
139       return false;
140     }
141   }
142   return true;
143 }
144 void CombinerHelper::applyCombineConcatVectors(
145     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
146   // We determined that the concat_vectors can be flatten.
147   // Generate the flattened build_vector.
148   Register DstReg = MI.getOperand(0).getReg();
149   Builder.setInsertPt(*MI.getParent(), MI);
150   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
151 
152   // Note: IsUndef is sort of redundant. We could have determine it by
153   // checking that at all Ops are undef.  Alternatively, we could have
154   // generate a build_vector of undefs and rely on another combine to
155   // clean that up.  For now, given we already gather this information
156   // in tryCombineConcatVectors, just save compile time and issue the
157   // right thing.
158   if (IsUndef)
159     Builder.buildUndef(NewDstReg);
160   else
161     Builder.buildBuildVector(NewDstReg, Ops);
162   MI.eraseFromParent();
163   replaceRegWith(MRI, DstReg, NewDstReg);
164 }
165 
166 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
167   SmallVector<Register, 4> Ops;
168   if (matchCombineShuffleVector(MI, Ops)) {
169     applyCombineShuffleVector(MI, Ops);
170     return true;
171   }
172   return false;
173 }
174 
175 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
176                                                SmallVectorImpl<Register> &Ops) {
177   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
178          "Invalid instruction kind");
179   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
180   Register Src1 = MI.getOperand(1).getReg();
181   LLT SrcType = MRI.getType(Src1);
182   // As bizarre as it may look, shuffle vector can actually produce
183   // scalar! This is because at the IR level a <1 x ty> shuffle
184   // vector is perfectly valid.
185   unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
186   unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
187 
188   // If the resulting vector is smaller than the size of the source
189   // vectors being concatenated, we won't be able to replace the
190   // shuffle vector into a concat_vectors.
191   //
192   // Note: We may still be able to produce a concat_vectors fed by
193   //       extract_vector_elt and so on. It is less clear that would
194   //       be better though, so don't bother for now.
195   //
196   // If the destination is a scalar, the size of the sources doesn't
197   // matter. we will lower the shuffle to a plain copy. This will
198   // work only if the source and destination have the same size. But
199   // that's covered by the next condition.
200   //
201   // TODO: If the size between the source and destination don't match
202   //       we could still emit an extract vector element in that case.
203   if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
204     return false;
205 
206   // Check that the shuffle mask can be broken evenly between the
207   // different sources.
208   if (DstNumElts % SrcNumElts != 0)
209     return false;
210 
211   // Mask length is a multiple of the source vector length.
212   // Check if the shuffle is some kind of concatenation of the input
213   // vectors.
214   unsigned NumConcat = DstNumElts / SrcNumElts;
215   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
216   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
217   for (unsigned i = 0; i != DstNumElts; ++i) {
218     int Idx = Mask[i];
219     // Undef value.
220     if (Idx < 0)
221       continue;
222     // Ensure the indices in each SrcType sized piece are sequential and that
223     // the same source is used for the whole piece.
224     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
225         (ConcatSrcs[i / SrcNumElts] >= 0 &&
226          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
227       return false;
228     // Remember which source this index came from.
229     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
230   }
231 
232   // The shuffle is concatenating multiple vectors together.
233   // Collect the different operands for that.
234   Register UndefReg;
235   Register Src2 = MI.getOperand(2).getReg();
236   for (auto Src : ConcatSrcs) {
237     if (Src < 0) {
238       if (!UndefReg) {
239         Builder.setInsertPt(*MI.getParent(), MI);
240         UndefReg = Builder.buildUndef(SrcType).getReg(0);
241       }
242       Ops.push_back(UndefReg);
243     } else if (Src == 0)
244       Ops.push_back(Src1);
245     else
246       Ops.push_back(Src2);
247   }
248   return true;
249 }
250 
251 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
252                                                const ArrayRef<Register> Ops) {
253   Register DstReg = MI.getOperand(0).getReg();
254   Builder.setInsertPt(*MI.getParent(), MI);
255   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
256 
257   if (Ops.size() == 1)
258     Builder.buildCopy(NewDstReg, Ops[0]);
259   else
260     Builder.buildMerge(NewDstReg, Ops);
261 
262   MI.eraseFromParent();
263   replaceRegWith(MRI, DstReg, NewDstReg);
264 }
265 
266 namespace {
267 
268 /// Select a preference between two uses. CurrentUse is the current preference
269 /// while *ForCandidate is attributes of the candidate under consideration.
270 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
271                                   const LLT TyForCandidate,
272                                   unsigned OpcodeForCandidate,
273                                   MachineInstr *MIForCandidate) {
274   if (!CurrentUse.Ty.isValid()) {
275     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
276         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
277       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
278     return CurrentUse;
279   }
280 
281   // We permit the extend to hoist through basic blocks but this is only
282   // sensible if the target has extending loads. If you end up lowering back
283   // into a load and extend during the legalizer then the end result is
284   // hoisting the extend up to the load.
285 
286   // Prefer defined extensions to undefined extensions as these are more
287   // likely to reduce the number of instructions.
288   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
289       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
290     return CurrentUse;
291   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
292            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
293     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
294 
295   // Prefer sign extensions to zero extensions as sign-extensions tend to be
296   // more expensive.
297   if (CurrentUse.Ty == TyForCandidate) {
298     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
299         OpcodeForCandidate == TargetOpcode::G_ZEXT)
300       return CurrentUse;
301     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
302              OpcodeForCandidate == TargetOpcode::G_SEXT)
303       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
304   }
305 
306   // This is potentially target specific. We've chosen the largest type
307   // because G_TRUNC is usually free. One potential catch with this is that
308   // some targets have a reduced number of larger registers than smaller
309   // registers and this choice potentially increases the live-range for the
310   // larger value.
311   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
312     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
313   }
314   return CurrentUse;
315 }
316 
317 /// Find a suitable place to insert some instructions and insert them. This
318 /// function accounts for special cases like inserting before a PHI node.
319 /// The current strategy for inserting before PHI's is to duplicate the
320 /// instructions for each predecessor. However, while that's ok for G_TRUNC
321 /// on most targets since it generally requires no code, other targets/cases may
322 /// want to try harder to find a dominating block.
323 static void InsertInsnsWithoutSideEffectsBeforeUse(
324     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
325     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
326                        MachineOperand &UseMO)>
327         Inserter) {
328   MachineInstr &UseMI = *UseMO.getParent();
329 
330   MachineBasicBlock *InsertBB = UseMI.getParent();
331 
332   // If the use is a PHI then we want the predecessor block instead.
333   if (UseMI.isPHI()) {
334     MachineOperand *PredBB = std::next(&UseMO);
335     InsertBB = PredBB->getMBB();
336   }
337 
338   // If the block is the same block as the def then we want to insert just after
339   // the def instead of at the start of the block.
340   if (InsertBB == DefMI.getParent()) {
341     MachineBasicBlock::iterator InsertPt = &DefMI;
342     Inserter(InsertBB, std::next(InsertPt), UseMO);
343     return;
344   }
345 
346   // Otherwise we want the start of the BB
347   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
348 }
349 } // end anonymous namespace
350 
351 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
352   PreferredTuple Preferred;
353   if (matchCombineExtendingLoads(MI, Preferred)) {
354     applyCombineExtendingLoads(MI, Preferred);
355     return true;
356   }
357   return false;
358 }
359 
360 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
361                                                 PreferredTuple &Preferred) {
362   // We match the loads and follow the uses to the extend instead of matching
363   // the extends and following the def to the load. This is because the load
364   // must remain in the same position for correctness (unless we also add code
365   // to find a safe place to sink it) whereas the extend is freely movable.
366   // It also prevents us from duplicating the load for the volatile case or just
367   // for performance.
368 
369   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
370       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
371       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
372     return false;
373 
374   auto &LoadValue = MI.getOperand(0);
375   assert(LoadValue.isReg() && "Result wasn't a register?");
376 
377   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
378   if (!LoadValueTy.isScalar())
379     return false;
380 
381   // Most architectures are going to legalize <s8 loads into at least a 1 byte
382   // load, and the MMOs can only describe memory accesses in multiples of bytes.
383   // If we try to perform extload combining on those, we can end up with
384   // %a(s8) = extload %ptr (load 1 byte from %ptr)
385   // ... which is an illegal extload instruction.
386   if (LoadValueTy.getSizeInBits() < 8)
387     return false;
388 
389   // For non power-of-2 types, they will very likely be legalized into multiple
390   // loads. Don't bother trying to match them into extending loads.
391   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
392     return false;
393 
394   // Find the preferred type aside from the any-extends (unless it's the only
395   // one) and non-extending ops. We'll emit an extending load to that type and
396   // and emit a variant of (extend (trunc X)) for the others according to the
397   // relative type sizes. At the same time, pick an extend to use based on the
398   // extend involved in the chosen type.
399   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
400                                  ? TargetOpcode::G_ANYEXT
401                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
402                                        ? TargetOpcode::G_SEXT
403                                        : TargetOpcode::G_ZEXT;
404   Preferred = {LLT(), PreferredOpcode, nullptr};
405   for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
406     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
407         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
408         UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
409       Preferred = ChoosePreferredUse(Preferred,
410                                      MRI.getType(UseMI.getOperand(0).getReg()),
411                                      UseMI.getOpcode(), &UseMI);
412     }
413   }
414 
415   // There were no extends
416   if (!Preferred.MI)
417     return false;
418   // It should be impossible to chose an extend without selecting a different
419   // type since by definition the result of an extend is larger.
420   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
421 
422   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
423   return true;
424 }
425 
426 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
427                                                 PreferredTuple &Preferred) {
428   // Rewrite the load to the chosen extending load.
429   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
430 
431   // Inserter to insert a truncate back to the original type at a given point
432   // with some basic CSE to limit truncate duplication to one per BB.
433   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
434   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
435                            MachineBasicBlock::iterator InsertBefore,
436                            MachineOperand &UseMO) {
437     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
438     if (PreviouslyEmitted) {
439       Observer.changingInstr(*UseMO.getParent());
440       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
441       Observer.changedInstr(*UseMO.getParent());
442       return;
443     }
444 
445     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
446     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
447     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
448     EmittedInsns[InsertIntoBB] = NewMI;
449     replaceRegOpWith(MRI, UseMO, NewDstReg);
450   };
451 
452   Observer.changingInstr(MI);
453   MI.setDesc(
454       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
455                                ? TargetOpcode::G_SEXTLOAD
456                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
457                                      ? TargetOpcode::G_ZEXTLOAD
458                                      : TargetOpcode::G_LOAD));
459 
460   // Rewrite all the uses to fix up the types.
461   auto &LoadValue = MI.getOperand(0);
462   SmallVector<MachineOperand *, 4> Uses;
463   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
464     Uses.push_back(&UseMO);
465 
466   for (auto *UseMO : Uses) {
467     MachineInstr *UseMI = UseMO->getParent();
468 
469     // If the extend is compatible with the preferred extend then we should fix
470     // up the type and extend so that it uses the preferred use.
471     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
472         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
473       Register UseDstReg = UseMI->getOperand(0).getReg();
474       MachineOperand &UseSrcMO = UseMI->getOperand(1);
475       const LLT UseDstTy = MRI.getType(UseDstReg);
476       if (UseDstReg != ChosenDstReg) {
477         if (Preferred.Ty == UseDstTy) {
478           // If the use has the same type as the preferred use, then merge
479           // the vregs and erase the extend. For example:
480           //    %1:_(s8) = G_LOAD ...
481           //    %2:_(s32) = G_SEXT %1(s8)
482           //    %3:_(s32) = G_ANYEXT %1(s8)
483           //    ... = ... %3(s32)
484           // rewrites to:
485           //    %2:_(s32) = G_SEXTLOAD ...
486           //    ... = ... %2(s32)
487           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
488           Observer.erasingInstr(*UseMO->getParent());
489           UseMO->getParent()->eraseFromParent();
490         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
491           // If the preferred size is smaller, then keep the extend but extend
492           // from the result of the extending load. For example:
493           //    %1:_(s8) = G_LOAD ...
494           //    %2:_(s32) = G_SEXT %1(s8)
495           //    %3:_(s64) = G_ANYEXT %1(s8)
496           //    ... = ... %3(s64)
497           /// rewrites to:
498           //    %2:_(s32) = G_SEXTLOAD ...
499           //    %3:_(s64) = G_ANYEXT %2:_(s32)
500           //    ... = ... %3(s64)
501           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
502         } else {
503           // If the preferred size is large, then insert a truncate. For
504           // example:
505           //    %1:_(s8) = G_LOAD ...
506           //    %2:_(s64) = G_SEXT %1(s8)
507           //    %3:_(s32) = G_ZEXT %1(s8)
508           //    ... = ... %3(s32)
509           /// rewrites to:
510           //    %2:_(s64) = G_SEXTLOAD ...
511           //    %4:_(s8) = G_TRUNC %2:_(s32)
512           //    %3:_(s64) = G_ZEXT %2:_(s8)
513           //    ... = ... %3(s64)
514           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
515                                                  InsertTruncAt);
516         }
517         continue;
518       }
519       // The use is (one of) the uses of the preferred use we chose earlier.
520       // We're going to update the load to def this value later so just erase
521       // the old extend.
522       Observer.erasingInstr(*UseMO->getParent());
523       UseMO->getParent()->eraseFromParent();
524       continue;
525     }
526 
527     // The use isn't an extend. Truncate back to the type we originally loaded.
528     // This is free on many targets.
529     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
530   }
531 
532   MI.getOperand(0).setReg(ChosenDstReg);
533   Observer.changedInstr(MI);
534 }
535 
536 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
537                                    const MachineInstr &UseMI) {
538   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
539          "shouldn't consider debug uses");
540   assert(DefMI.getParent() == UseMI.getParent());
541   if (&DefMI == &UseMI)
542     return false;
543 
544   // Loop through the basic block until we find one of the instructions.
545   MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
546   for (; &*I != &DefMI && &*I != &UseMI; ++I)
547     return &*I == &DefMI;
548 
549   llvm_unreachable("Block must contain instructions");
550 }
551 
552 bool CombinerHelper::dominates(const MachineInstr &DefMI,
553                                const MachineInstr &UseMI) {
554   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
555          "shouldn't consider debug uses");
556   if (MDT)
557     return MDT->dominates(&DefMI, &UseMI);
558   else if (DefMI.getParent() != UseMI.getParent())
559     return false;
560 
561   return isPredecessor(DefMI, UseMI);
562 }
563 
564 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
565                                             Register &Base, Register &Offset) {
566   auto &MF = *MI.getParent()->getParent();
567   const auto &TLI = *MF.getSubtarget().getTargetLowering();
568 
569 #ifndef NDEBUG
570   unsigned Opcode = MI.getOpcode();
571   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
572          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
573 #endif
574 
575   Base = MI.getOperand(1).getReg();
576   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
577   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
578     return false;
579 
580   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
581 
582   for (auto &Use : MRI.use_nodbg_instructions(Base)) {
583     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
584       continue;
585 
586     Offset = Use.getOperand(2).getReg();
587     if (!ForceLegalIndexing &&
588         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
589       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
590                         << Use);
591       continue;
592     }
593 
594     // Make sure the offset calculation is before the potentially indexed op.
595     // FIXME: we really care about dependency here. The offset calculation might
596     // be movable.
597     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
598     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
599       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
600                         << Use);
601       continue;
602     }
603 
604     // FIXME: check whether all uses of Base are load/store with foldable
605     // addressing modes. If so, using the normal addr-modes is better than
606     // forming an indexed one.
607 
608     bool MemOpDominatesAddrUses = true;
609     for (auto &PtrAddUse :
610          MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
611       if (!dominates(MI, PtrAddUse)) {
612         MemOpDominatesAddrUses = false;
613         break;
614       }
615     }
616 
617     if (!MemOpDominatesAddrUses) {
618       LLVM_DEBUG(
619           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
620                  << Use);
621       continue;
622     }
623 
624     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
625     Addr = Use.getOperand(0).getReg();
626     return true;
627   }
628 
629   return false;
630 }
631 
632 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
633                                            Register &Base, Register &Offset) {
634   auto &MF = *MI.getParent()->getParent();
635   const auto &TLI = *MF.getSubtarget().getTargetLowering();
636 
637 #ifndef NDEBUG
638   unsigned Opcode = MI.getOpcode();
639   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
640          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
641 #endif
642 
643   Addr = MI.getOperand(1).getReg();
644   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
645   if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
646     return false;
647 
648   Base = AddrDef->getOperand(1).getReg();
649   Offset = AddrDef->getOperand(2).getReg();
650 
651   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
652 
653   if (!ForceLegalIndexing &&
654       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
655     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
656     return false;
657   }
658 
659   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
660   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
661     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
662     return false;
663   }
664 
665   if (MI.getOpcode() == TargetOpcode::G_STORE) {
666     // Would require a copy.
667     if (Base == MI.getOperand(0).getReg()) {
668       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
669       return false;
670     }
671 
672     // We're expecting one use of Addr in MI, but it could also be the
673     // value stored, which isn't actually dominated by the instruction.
674     if (MI.getOperand(0).getReg() == Addr) {
675       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
676       return false;
677     }
678   }
679 
680   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
681   // That might allow us to end base's liveness here by adjusting the constant.
682 
683   for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
684     if (!dominates(MI, UseMI)) {
685       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
686       return false;
687     }
688   }
689 
690   return true;
691 }
692 
693 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
694   IndexedLoadStoreMatchInfo MatchInfo;
695   if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
696     applyCombineIndexedLoadStore(MI, MatchInfo);
697     return true;
698   }
699   return false;
700 }
701 
702 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
703   unsigned Opcode = MI.getOpcode();
704   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
705       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
706     return false;
707 
708   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
709                                           MatchInfo.Offset);
710   if (!MatchInfo.IsPre &&
711       !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
712                               MatchInfo.Offset))
713     return false;
714 
715   return true;
716 }
717 
718 void CombinerHelper::applyCombineIndexedLoadStore(
719     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
720   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
721   MachineIRBuilder MIRBuilder(MI);
722   unsigned Opcode = MI.getOpcode();
723   bool IsStore = Opcode == TargetOpcode::G_STORE;
724   unsigned NewOpcode;
725   switch (Opcode) {
726   case TargetOpcode::G_LOAD:
727     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
728     break;
729   case TargetOpcode::G_SEXTLOAD:
730     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
731     break;
732   case TargetOpcode::G_ZEXTLOAD:
733     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
734     break;
735   case TargetOpcode::G_STORE:
736     NewOpcode = TargetOpcode::G_INDEXED_STORE;
737     break;
738   default:
739     llvm_unreachable("Unknown load/store opcode");
740   }
741 
742   auto MIB = MIRBuilder.buildInstr(NewOpcode);
743   if (IsStore) {
744     MIB.addDef(MatchInfo.Addr);
745     MIB.addUse(MI.getOperand(0).getReg());
746   } else {
747     MIB.addDef(MI.getOperand(0).getReg());
748     MIB.addDef(MatchInfo.Addr);
749   }
750 
751   MIB.addUse(MatchInfo.Base);
752   MIB.addUse(MatchInfo.Offset);
753   MIB.addImm(MatchInfo.IsPre);
754   MI.eraseFromParent();
755   AddrDef.eraseFromParent();
756 
757   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
758 }
759 
760 bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
761   if (MI.getOpcode() != TargetOpcode::G_BR)
762     return false;
763 
764   // Try to match the following:
765   // bb1:
766   //   %c(s32) = G_ICMP pred, %a, %b
767   //   %c1(s1) = G_TRUNC %c(s32)
768   //   G_BRCOND %c1, %bb2
769   //   G_BR %bb3
770   // bb2:
771   // ...
772   // bb3:
773 
774   // The above pattern does not have a fall through to the successor bb2, always
775   // resulting in a branch no matter which path is taken. Here we try to find
776   // and replace that pattern with conditional branch to bb3 and otherwise
777   // fallthrough to bb2.
778 
779   MachineBasicBlock *MBB = MI.getParent();
780   MachineBasicBlock::iterator BrIt(MI);
781   if (BrIt == MBB->begin())
782     return false;
783   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
784 
785   MachineInstr *BrCond = &*std::prev(BrIt);
786   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
787     return false;
788 
789   // Check that the next block is the conditional branch target.
790   if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
791     return false;
792 
793   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
794   if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
795       !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
796     return false;
797   return true;
798 }
799 
800 bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
801   if (!matchElideBrByInvertingCond(MI))
802     return false;
803   applyElideBrByInvertingCond(MI);
804   return true;
805 }
806 
807 void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
808   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
809   MachineBasicBlock::iterator BrIt(MI);
810   MachineInstr *BrCond = &*std::prev(BrIt);
811   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
812 
813   CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
814       (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
815 
816   // Invert the G_ICMP condition.
817   Observer.changingInstr(*CmpMI);
818   CmpMI->getOperand(1).setPredicate(InversePred);
819   Observer.changedInstr(*CmpMI);
820 
821   // Change the conditional branch target.
822   Observer.changingInstr(*BrCond);
823   BrCond->getOperand(1).setMBB(BrTarget);
824   Observer.changedInstr(*BrCond);
825   MI.eraseFromParent();
826 }
827 
828 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
829   // On Darwin, -Os means optimize for size without hurting performance, so
830   // only really optimize for size when -Oz (MinSize) is used.
831   if (MF.getTarget().getTargetTriple().isOSDarwin())
832     return MF.getFunction().hasMinSize();
833   return MF.getFunction().hasOptSize();
834 }
835 
836 // Returns a list of types to use for memory op lowering in MemOps. A partial
837 // port of findOptimalMemOpLowering in TargetLowering.
838 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
839                                           unsigned Limit, const MemOp &Op,
840                                           unsigned DstAS, unsigned SrcAS,
841                                           const AttributeList &FuncAttributes,
842                                           const TargetLowering &TLI) {
843   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
844     return false;
845 
846   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
847 
848   if (Ty == LLT()) {
849     // Use the largest scalar type whose alignment constraints are satisfied.
850     // We only need to check DstAlign here as SrcAlign is always greater or
851     // equal to DstAlign (or zero).
852     Ty = LLT::scalar(64);
853     if (Op.isFixedDstAlign())
854       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
855              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS,
856                                                  Op.getDstAlign().value()))
857         Ty = LLT::scalar(Ty.getSizeInBytes());
858     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
859     // FIXME: check for the largest legal type we can load/store to.
860   }
861 
862   unsigned NumMemOps = 0;
863   uint64_t Size = Op.size();
864   while (Size) {
865     unsigned TySize = Ty.getSizeInBytes();
866     while (TySize > Size) {
867       // For now, only use non-vector load / store's for the left-over pieces.
868       LLT NewTy = Ty;
869       // FIXME: check for mem op safety and legality of the types. Not all of
870       // SDAGisms map cleanly to GISel concepts.
871       if (NewTy.isVector())
872         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
873       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
874       unsigned NewTySize = NewTy.getSizeInBytes();
875       assert(NewTySize > 0 && "Could not find appropriate type");
876 
877       // If the new LLT cannot cover all of the remaining bits, then consider
878       // issuing a (or a pair of) unaligned and overlapping load / store.
879       bool Fast;
880       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
881       MVT VT = getMVTForLLT(Ty);
882       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
883           TLI.allowsMisalignedMemoryAccesses(
884               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
885               MachineMemOperand::MONone, &Fast) &&
886           Fast)
887         TySize = Size;
888       else {
889         Ty = NewTy;
890         TySize = NewTySize;
891       }
892     }
893 
894     if (++NumMemOps > Limit)
895       return false;
896 
897     MemOps.push_back(Ty);
898     Size -= TySize;
899   }
900 
901   return true;
902 }
903 
904 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
905   if (Ty.isVector())
906     return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
907                            Ty.getNumElements());
908   return IntegerType::get(C, Ty.getSizeInBits());
909 }
910 
911 // Get a vectorized representation of the memset value operand, GISel edition.
912 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
913   MachineRegisterInfo &MRI = *MIB.getMRI();
914   unsigned NumBits = Ty.getScalarSizeInBits();
915   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
916   if (!Ty.isVector() && ValVRegAndVal) {
917     unsigned KnownVal = ValVRegAndVal->Value;
918     APInt Scalar = APInt(8, KnownVal);
919     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
920     return MIB.buildConstant(Ty, SplatVal).getReg(0);
921   }
922   // FIXME: for vector types create a G_BUILD_VECTOR.
923   if (Ty.isVector())
924     return Register();
925 
926   // Extend the byte value to the larger type, and then multiply by a magic
927   // value 0x010101... in order to replicate it across every byte.
928   LLT ExtType = Ty.getScalarType();
929   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
930   if (NumBits > 8) {
931     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
932     auto MagicMI = MIB.buildConstant(ExtType, Magic);
933     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
934   }
935 
936   assert(ExtType == Ty && "Vector memset value type not supported yet");
937   return Val;
938 }
939 
940 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
941                                     Register Val, unsigned KnownLen,
942                                     Align Alignment, bool IsVolatile) {
943   auto &MF = *MI.getParent()->getParent();
944   const auto &TLI = *MF.getSubtarget().getTargetLowering();
945   auto &DL = MF.getDataLayout();
946   LLVMContext &C = MF.getFunction().getContext();
947 
948   assert(KnownLen != 0 && "Have a zero length memset length!");
949 
950   bool DstAlignCanChange = false;
951   MachineFrameInfo &MFI = MF.getFrameInfo();
952   bool OptSize = shouldLowerMemFuncForSize(MF);
953 
954   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
955   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
956     DstAlignCanChange = true;
957 
958   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
959   std::vector<LLT> MemOps;
960 
961   const auto &DstMMO = **MI.memoperands_begin();
962   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
963 
964   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
965   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
966 
967   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
968                                      MemOp::Set(KnownLen, DstAlignCanChange,
969                                                 Alignment,
970                                                 /*IsZeroMemset=*/IsZeroVal,
971                                                 /*IsVolatile=*/IsVolatile),
972                                      DstPtrInfo.getAddrSpace(), ~0u,
973                                      MF.getFunction().getAttributes(), TLI))
974     return false;
975 
976   if (DstAlignCanChange) {
977     // Get an estimate of the type from the LLT.
978     Type *IRTy = getTypeForLLT(MemOps[0], C);
979     Align NewAlign = DL.getABITypeAlign(IRTy);
980     if (NewAlign > Alignment) {
981       Alignment = NewAlign;
982       unsigned FI = FIDef->getOperand(1).getIndex();
983       // Give the stack frame object a larger alignment if needed.
984       if (MFI.getObjectAlign(FI) < Alignment)
985         MFI.setObjectAlignment(FI, Alignment);
986     }
987   }
988 
989   MachineIRBuilder MIB(MI);
990   // Find the largest store and generate the bit pattern for it.
991   LLT LargestTy = MemOps[0];
992   for (unsigned i = 1; i < MemOps.size(); i++)
993     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
994       LargestTy = MemOps[i];
995 
996   // The memset stored value is always defined as an s8, so in order to make it
997   // work with larger store types we need to repeat the bit pattern across the
998   // wider type.
999   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1000 
1001   if (!MemSetValue)
1002     return false;
1003 
1004   // Generate the stores. For each store type in the list, we generate the
1005   // matching store of that type to the destination address.
1006   LLT PtrTy = MRI.getType(Dst);
1007   unsigned DstOff = 0;
1008   unsigned Size = KnownLen;
1009   for (unsigned I = 0; I < MemOps.size(); I++) {
1010     LLT Ty = MemOps[I];
1011     unsigned TySize = Ty.getSizeInBytes();
1012     if (TySize > Size) {
1013       // Issuing an unaligned load / store pair that overlaps with the previous
1014       // pair. Adjust the offset accordingly.
1015       assert(I == MemOps.size() - 1 && I != 0);
1016       DstOff -= TySize - Size;
1017     }
1018 
1019     // If this store is smaller than the largest store see whether we can get
1020     // the smaller value for free with a truncate.
1021     Register Value = MemSetValue;
1022     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1023       MVT VT = getMVTForLLT(Ty);
1024       MVT LargestVT = getMVTForLLT(LargestTy);
1025       if (!LargestTy.isVector() && !Ty.isVector() &&
1026           TLI.isTruncateFree(LargestVT, VT))
1027         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1028       else
1029         Value = getMemsetValue(Val, Ty, MIB);
1030       if (!Value)
1031         return false;
1032     }
1033 
1034     auto *StoreMMO =
1035         MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1036 
1037     Register Ptr = Dst;
1038     if (DstOff != 0) {
1039       auto Offset =
1040           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1041       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1042     }
1043 
1044     MIB.buildStore(Value, Ptr, *StoreMMO);
1045     DstOff += Ty.getSizeInBytes();
1046     Size -= TySize;
1047   }
1048 
1049   MI.eraseFromParent();
1050   return true;
1051 }
1052 
1053 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1054                                     Register Src, unsigned KnownLen,
1055                                     Align DstAlign, Align SrcAlign,
1056                                     bool IsVolatile) {
1057   auto &MF = *MI.getParent()->getParent();
1058   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1059   auto &DL = MF.getDataLayout();
1060   LLVMContext &C = MF.getFunction().getContext();
1061 
1062   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1063 
1064   bool DstAlignCanChange = false;
1065   MachineFrameInfo &MFI = MF.getFrameInfo();
1066   bool OptSize = shouldLowerMemFuncForSize(MF);
1067   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1068 
1069   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1070   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1071     DstAlignCanChange = true;
1072 
1073   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1074   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1075   // if the memcpy is in a tail call position.
1076 
1077   unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1078   std::vector<LLT> MemOps;
1079 
1080   const auto &DstMMO = **MI.memoperands_begin();
1081   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1082   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1083   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1084 
1085   if (!findGISelOptimalMemOpLowering(
1086           MemOps, Limit,
1087           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1088                       IsVolatile),
1089           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1090           MF.getFunction().getAttributes(), TLI))
1091     return false;
1092 
1093   if (DstAlignCanChange) {
1094     // Get an estimate of the type from the LLT.
1095     Type *IRTy = getTypeForLLT(MemOps[0], C);
1096     Align NewAlign = DL.getABITypeAlign(IRTy);
1097 
1098     // Don't promote to an alignment that would require dynamic stack
1099     // realignment.
1100     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1101     if (!TRI->needsStackRealignment(MF))
1102       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1103         NewAlign = NewAlign / 2;
1104 
1105     if (NewAlign > Alignment) {
1106       Alignment = NewAlign;
1107       unsigned FI = FIDef->getOperand(1).getIndex();
1108       // Give the stack frame object a larger alignment if needed.
1109       if (MFI.getObjectAlign(FI) < Alignment)
1110         MFI.setObjectAlignment(FI, Alignment);
1111     }
1112   }
1113 
1114   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1115 
1116   MachineIRBuilder MIB(MI);
1117   // Now we need to emit a pair of load and stores for each of the types we've
1118   // collected. I.e. for each type, generate a load from the source pointer of
1119   // that type width, and then generate a corresponding store to the dest buffer
1120   // of that value loaded. This can result in a sequence of loads and stores
1121   // mixed types, depending on what the target specifies as good types to use.
1122   unsigned CurrOffset = 0;
1123   LLT PtrTy = MRI.getType(Src);
1124   unsigned Size = KnownLen;
1125   for (auto CopyTy : MemOps) {
1126     // Issuing an unaligned load / store pair  that overlaps with the previous
1127     // pair. Adjust the offset accordingly.
1128     if (CopyTy.getSizeInBytes() > Size)
1129       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1130 
1131     // Construct MMOs for the accesses.
1132     auto *LoadMMO =
1133         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1134     auto *StoreMMO =
1135         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1136 
1137     // Create the load.
1138     Register LoadPtr = Src;
1139     Register Offset;
1140     if (CurrOffset != 0) {
1141       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1142                    .getReg(0);
1143       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1144     }
1145     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1146 
1147     // Create the store.
1148     Register StorePtr =
1149         CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1150     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1151     CurrOffset += CopyTy.getSizeInBytes();
1152     Size -= CopyTy.getSizeInBytes();
1153   }
1154 
1155   MI.eraseFromParent();
1156   return true;
1157 }
1158 
1159 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1160                                      Register Src, unsigned KnownLen,
1161                                      Align DstAlign, Align SrcAlign,
1162                                      bool IsVolatile) {
1163   auto &MF = *MI.getParent()->getParent();
1164   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1165   auto &DL = MF.getDataLayout();
1166   LLVMContext &C = MF.getFunction().getContext();
1167 
1168   assert(KnownLen != 0 && "Have a zero length memmove length!");
1169 
1170   bool DstAlignCanChange = false;
1171   MachineFrameInfo &MFI = MF.getFrameInfo();
1172   bool OptSize = shouldLowerMemFuncForSize(MF);
1173   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1174 
1175   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1176   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1177     DstAlignCanChange = true;
1178 
1179   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1180   std::vector<LLT> MemOps;
1181 
1182   const auto &DstMMO = **MI.memoperands_begin();
1183   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1184   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1185   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1186 
1187   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1188   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1189   // same thing here.
1190   if (!findGISelOptimalMemOpLowering(
1191           MemOps, Limit,
1192           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1193                       /*IsVolatile*/ true),
1194           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1195           MF.getFunction().getAttributes(), TLI))
1196     return false;
1197 
1198   if (DstAlignCanChange) {
1199     // Get an estimate of the type from the LLT.
1200     Type *IRTy = getTypeForLLT(MemOps[0], C);
1201     Align NewAlign = DL.getABITypeAlign(IRTy);
1202 
1203     // Don't promote to an alignment that would require dynamic stack
1204     // realignment.
1205     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1206     if (!TRI->needsStackRealignment(MF))
1207       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1208         NewAlign = NewAlign / 2;
1209 
1210     if (NewAlign > Alignment) {
1211       Alignment = NewAlign;
1212       unsigned FI = FIDef->getOperand(1).getIndex();
1213       // Give the stack frame object a larger alignment if needed.
1214       if (MFI.getObjectAlign(FI) < Alignment)
1215         MFI.setObjectAlignment(FI, Alignment);
1216     }
1217   }
1218 
1219   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1220 
1221   MachineIRBuilder MIB(MI);
1222   // Memmove requires that we perform the loads first before issuing the stores.
1223   // Apart from that, this loop is pretty much doing the same thing as the
1224   // memcpy codegen function.
1225   unsigned CurrOffset = 0;
1226   LLT PtrTy = MRI.getType(Src);
1227   SmallVector<Register, 16> LoadVals;
1228   for (auto CopyTy : MemOps) {
1229     // Construct MMO for the load.
1230     auto *LoadMMO =
1231         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1232 
1233     // Create the load.
1234     Register LoadPtr = Src;
1235     if (CurrOffset != 0) {
1236       auto Offset =
1237           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1238       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1239     }
1240     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1241     CurrOffset += CopyTy.getSizeInBytes();
1242   }
1243 
1244   CurrOffset = 0;
1245   for (unsigned I = 0; I < MemOps.size(); ++I) {
1246     LLT CopyTy = MemOps[I];
1247     // Now store the values loaded.
1248     auto *StoreMMO =
1249         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1250 
1251     Register StorePtr = Dst;
1252     if (CurrOffset != 0) {
1253       auto Offset =
1254           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1255       StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1256     }
1257     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1258     CurrOffset += CopyTy.getSizeInBytes();
1259   }
1260   MI.eraseFromParent();
1261   return true;
1262 }
1263 
1264 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1265   // This combine is fairly complex so it's not written with a separate
1266   // matcher function.
1267   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1268   Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
1269   assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1270           ID == Intrinsic::memset) &&
1271          "Expected a memcpy like intrinsic");
1272 
1273   auto MMOIt = MI.memoperands_begin();
1274   const MachineMemOperand *MemOp = *MMOIt;
1275   bool IsVolatile = MemOp->isVolatile();
1276   // Don't try to optimize volatile.
1277   if (IsVolatile)
1278     return false;
1279 
1280   Align DstAlign = MemOp->getBaseAlign();
1281   Align SrcAlign;
1282   Register Dst = MI.getOperand(1).getReg();
1283   Register Src = MI.getOperand(2).getReg();
1284   Register Len = MI.getOperand(3).getReg();
1285 
1286   if (ID != Intrinsic::memset) {
1287     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1288     MemOp = *(++MMOIt);
1289     SrcAlign = MemOp->getBaseAlign();
1290   }
1291 
1292   // See if this is a constant length copy
1293   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1294   if (!LenVRegAndVal)
1295     return false; // Leave it to the legalizer to lower it to a libcall.
1296   unsigned KnownLen = LenVRegAndVal->Value;
1297 
1298   if (KnownLen == 0) {
1299     MI.eraseFromParent();
1300     return true;
1301   }
1302 
1303   if (MaxLen && KnownLen > MaxLen)
1304     return false;
1305 
1306   if (ID == Intrinsic::memcpy)
1307     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1308   if (ID == Intrinsic::memmove)
1309     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1310   if (ID == Intrinsic::memset)
1311     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1312   return false;
1313 }
1314 
1315 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1316                                            PtrAddChain &MatchInfo) {
1317   // We're trying to match the following pattern:
1318   //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1319   //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1320   // -->
1321   //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1322 
1323   if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1324     return false;
1325 
1326   Register Add2 = MI.getOperand(1).getReg();
1327   Register Imm1 = MI.getOperand(2).getReg();
1328   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1329   if (!MaybeImmVal)
1330     return false;
1331 
1332   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1333   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1334     return false;
1335 
1336   Register Base = Add2Def->getOperand(1).getReg();
1337   Register Imm2 = Add2Def->getOperand(2).getReg();
1338   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1339   if (!MaybeImm2Val)
1340     return false;
1341 
1342   // Pass the combined immediate to the apply function.
1343   MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1344   MatchInfo.Base = Base;
1345   return true;
1346 }
1347 
1348 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1349                                            PtrAddChain &MatchInfo) {
1350   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1351   MachineIRBuilder MIB(MI);
1352   LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1353   auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1354   Observer.changingInstr(MI);
1355   MI.getOperand(1).setReg(MatchInfo.Base);
1356   MI.getOperand(2).setReg(NewOffset.getReg(0));
1357   Observer.changedInstr(MI);
1358   return true;
1359 }
1360 
1361 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1362                                           unsigned &ShiftVal) {
1363   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1364   auto MaybeImmVal =
1365       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1366   if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
1367     return false;
1368   ShiftVal = Log2_64(MaybeImmVal->Value);
1369   return true;
1370 }
1371 
1372 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1373                                           unsigned &ShiftVal) {
1374   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1375   MachineIRBuilder MIB(MI);
1376   LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1377   auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1378   Observer.changingInstr(MI);
1379   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1380   MI.getOperand(2).setReg(ShiftCst.getReg(0));
1381   Observer.changedInstr(MI);
1382   return true;
1383 }
1384 
1385 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
1386                                                 unsigned TargetShiftSize,
1387                                                 unsigned &ShiftVal) {
1388   assert((MI.getOpcode() == TargetOpcode::G_SHL ||
1389           MI.getOpcode() == TargetOpcode::G_LSHR ||
1390           MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
1391 
1392   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1393   if (Ty.isVector()) // TODO:
1394     return false;
1395 
1396   // Don't narrow further than the requested size.
1397   unsigned Size = Ty.getSizeInBits();
1398   if (Size <= TargetShiftSize)
1399     return false;
1400 
1401   auto MaybeImmVal =
1402     getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1403   if (!MaybeImmVal)
1404     return false;
1405 
1406   ShiftVal = MaybeImmVal->Value;
1407   return ShiftVal >= Size / 2 && ShiftVal < Size;
1408 }
1409 
1410 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
1411                                                 const unsigned &ShiftVal) {
1412   Register DstReg = MI.getOperand(0).getReg();
1413   Register SrcReg = MI.getOperand(1).getReg();
1414   LLT Ty = MRI.getType(SrcReg);
1415   unsigned Size = Ty.getSizeInBits();
1416   unsigned HalfSize = Size / 2;
1417   assert(ShiftVal >= HalfSize);
1418 
1419   LLT HalfTy = LLT::scalar(HalfSize);
1420 
1421   Builder.setInstr(MI);
1422   auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
1423   unsigned NarrowShiftAmt = ShiftVal - HalfSize;
1424 
1425   if (MI.getOpcode() == TargetOpcode::G_LSHR) {
1426     Register Narrowed = Unmerge.getReg(1);
1427 
1428     //  dst = G_LSHR s64:x, C for C >= 32
1429     // =>
1430     //   lo, hi = G_UNMERGE_VALUES x
1431     //   dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
1432 
1433     if (NarrowShiftAmt != 0) {
1434       Narrowed = Builder.buildLShr(HalfTy, Narrowed,
1435         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1436     }
1437 
1438     auto Zero = Builder.buildConstant(HalfTy, 0);
1439     Builder.buildMerge(DstReg, { Narrowed, Zero });
1440   } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
1441     Register Narrowed = Unmerge.getReg(0);
1442     //  dst = G_SHL s64:x, C for C >= 32
1443     // =>
1444     //   lo, hi = G_UNMERGE_VALUES x
1445     //   dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
1446     if (NarrowShiftAmt != 0) {
1447       Narrowed = Builder.buildShl(HalfTy, Narrowed,
1448         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1449     }
1450 
1451     auto Zero = Builder.buildConstant(HalfTy, 0);
1452     Builder.buildMerge(DstReg, { Zero, Narrowed });
1453   } else {
1454     assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1455     auto Hi = Builder.buildAShr(
1456       HalfTy, Unmerge.getReg(1),
1457       Builder.buildConstant(HalfTy, HalfSize - 1));
1458 
1459     if (ShiftVal == HalfSize) {
1460       // (G_ASHR i64:x, 32) ->
1461       //   G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
1462       Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
1463     } else if (ShiftVal == Size - 1) {
1464       // Don't need a second shift.
1465       // (G_ASHR i64:x, 63) ->
1466       //   %narrowed = (G_ASHR hi_32(x), 31)
1467       //   G_MERGE_VALUES %narrowed, %narrowed
1468       Builder.buildMerge(DstReg, { Hi, Hi });
1469     } else {
1470       auto Lo = Builder.buildAShr(
1471         HalfTy, Unmerge.getReg(1),
1472         Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
1473 
1474       // (G_ASHR i64:x, C) ->, for C >= 32
1475       //   G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
1476       Builder.buildMerge(DstReg, { Lo, Hi });
1477     }
1478   }
1479 
1480   MI.eraseFromParent();
1481   return true;
1482 }
1483 
1484 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
1485                                               unsigned TargetShiftAmount) {
1486   unsigned ShiftAmt;
1487   if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
1488     applyCombineShiftToUnmerge(MI, ShiftAmt);
1489     return true;
1490   }
1491 
1492   return false;
1493 }
1494 
1495 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
1496   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1497     return MO.isReg() &&
1498            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1499   });
1500 }
1501 
1502 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
1503   return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1504     return !MO.isReg() ||
1505            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1506   });
1507 }
1508 
1509 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
1510   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
1511   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
1512   return all_of(Mask, [](int Elt) { return Elt < 0; });
1513 }
1514 
1515 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
1516                                     const MachineOperand &MOP2) {
1517   if (!MOP1.isReg() || !MOP2.isReg())
1518     return false;
1519   MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
1520   if (!I1)
1521     return false;
1522   MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
1523   if (!I2)
1524     return false;
1525 
1526   // Check for physical registers on the instructions first to avoid cases like
1527   // this:
1528   //
1529   // %a = COPY $physreg
1530   // ...
1531   // SOMETHING implicit-def $physreg
1532   // ...
1533   // %b = COPY $physreg
1534   //
1535   // These copies are not equivalent.
1536   if (any_of(I1->uses(), [](const MachineOperand &MO) {
1537         return MO.isReg() && MO.getReg().isPhysical();
1538       })) {
1539     // Check if we have a case like this:
1540     //
1541     // %a = COPY $physreg
1542     // %b = COPY %a
1543     //
1544     // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
1545     // From that, we know that they must have the same value, since they must
1546     // have come from the same COPY.
1547     return I1->isIdenticalTo(*I2);
1548   }
1549 
1550   // We don't have any physical registers, so we don't necessarily need the
1551   // same vreg defs.
1552   //
1553   // On the off-chance that there's some target instruction feeding into the
1554   // instruction, let's use produceSameValue instead of isIdenticalTo.
1555   return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
1556 }
1557 
1558 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
1559   if (!MOP.isReg())
1560     return false;
1561   int64_t Cst;
1562   return mi_match(MOP.getReg(), MRI, m_ICst(Cst)) && Cst == C;
1563 }
1564 
1565 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
1566                                                      unsigned OpIdx) {
1567   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
1568   Register OldReg = MI.getOperand(0).getReg();
1569   Register Replacement = MI.getOperand(OpIdx).getReg();
1570   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
1571   MI.eraseFromParent();
1572   replaceRegWith(MRI, OldReg, Replacement);
1573   return true;
1574 }
1575 
1576 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
1577   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
1578   // Match (cond ? x : x)
1579   return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
1580          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
1581                        MRI);
1582 }
1583 
1584 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
1585   return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
1586          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1587                        MRI);
1588 }
1589 
1590 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
1591   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1592   Builder.setInstr(MI);
1593   Builder.buildFConstant(MI.getOperand(0), C);
1594   MI.eraseFromParent();
1595   return true;
1596 }
1597 
1598 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
1599   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1600   Builder.setInstr(MI);
1601   Builder.buildConstant(MI.getOperand(0), C);
1602   MI.eraseFromParent();
1603   return true;
1604 }
1605 
1606 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
1607   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1608   Builder.setInstr(MI);
1609   Builder.buildUndef(MI.getOperand(0));
1610   MI.eraseFromParent();
1611   return true;
1612 }
1613 
1614 bool CombinerHelper::tryCombine(MachineInstr &MI) {
1615   if (tryCombineCopy(MI))
1616     return true;
1617   if (tryCombineExtendingLoads(MI))
1618     return true;
1619   if (tryCombineIndexedLoadStore(MI))
1620     return true;
1621   return false;
1622 }
1623