1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/CodeGen/GlobalISel/Combiner.h"
10 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
12 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
13 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
14 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
15 #include "llvm/CodeGen/GlobalISel/Utils.h"
16 #include "llvm/CodeGen/MachineDominators.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/Support/MathExtras.h"
23 #include "llvm/Target/TargetMachine.h"
24 
25 #define DEBUG_TYPE "gi-combiner"
26 
27 using namespace llvm;
28 using namespace MIPatternMatch;
29 
30 // Option to allow testing of the combiner while no targets know about indexed
31 // addressing.
32 static cl::opt<bool>
33     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
34                        cl::desc("Force all indexed operations to be "
35                                 "legal for the GlobalISel combiner"));
36 
37 
38 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
39                                MachineIRBuilder &B, GISelKnownBits *KB,
40                                MachineDominatorTree *MDT,
41                                const LegalizerInfo *LI)
42     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
43       KB(KB), MDT(MDT), LI(LI) {
44   (void)this->KB;
45 }
46 
47 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
48                                     Register ToReg) const {
49   Observer.changingAllUsesOfReg(MRI, FromReg);
50 
51   if (MRI.constrainRegAttrs(ToReg, FromReg))
52     MRI.replaceRegWith(FromReg, ToReg);
53   else
54     Builder.buildCopy(ToReg, FromReg);
55 
56   Observer.finishedChangingAllUsesOfReg();
57 }
58 
59 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
60                                       MachineOperand &FromRegOp,
61                                       Register ToReg) const {
62   assert(FromRegOp.getParent() && "Expected an operand in an MI");
63   Observer.changingInstr(*FromRegOp.getParent());
64 
65   FromRegOp.setReg(ToReg);
66 
67   Observer.changedInstr(*FromRegOp.getParent());
68 }
69 
70 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
71   if (matchCombineCopy(MI)) {
72     applyCombineCopy(MI);
73     return true;
74   }
75   return false;
76 }
77 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
78   if (MI.getOpcode() != TargetOpcode::COPY)
79     return false;
80   Register DstReg = MI.getOperand(0).getReg();
81   Register SrcReg = MI.getOperand(1).getReg();
82   return canReplaceReg(DstReg, SrcReg, MRI);
83 }
84 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
85   Register DstReg = MI.getOperand(0).getReg();
86   Register SrcReg = MI.getOperand(1).getReg();
87   MI.eraseFromParent();
88   replaceRegWith(MRI, DstReg, SrcReg);
89 }
90 
91 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
92   bool IsUndef = false;
93   SmallVector<Register, 4> Ops;
94   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
95     applyCombineConcatVectors(MI, IsUndef, Ops);
96     return true;
97   }
98   return false;
99 }
100 
101 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
102                                                SmallVectorImpl<Register> &Ops) {
103   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
104          "Invalid instruction");
105   IsUndef = true;
106   MachineInstr *Undef = nullptr;
107 
108   // Walk over all the operands of concat vectors and check if they are
109   // build_vector themselves or undef.
110   // Then collect their operands in Ops.
111   for (const MachineOperand &MO : MI.uses()) {
112     Register Reg = MO.getReg();
113     MachineInstr *Def = MRI.getVRegDef(Reg);
114     assert(Def && "Operand not defined");
115     switch (Def->getOpcode()) {
116     case TargetOpcode::G_BUILD_VECTOR:
117       IsUndef = false;
118       // Remember the operands of the build_vector to fold
119       // them into the yet-to-build flattened concat vectors.
120       for (const MachineOperand &BuildVecMO : Def->uses())
121         Ops.push_back(BuildVecMO.getReg());
122       break;
123     case TargetOpcode::G_IMPLICIT_DEF: {
124       LLT OpType = MRI.getType(Reg);
125       // Keep one undef value for all the undef operands.
126       if (!Undef) {
127         Builder.setInsertPt(*MI.getParent(), MI);
128         Undef = Builder.buildUndef(OpType.getScalarType());
129       }
130       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
131                  OpType.getScalarType() &&
132              "All undefs should have the same type");
133       // Break the undef vector in as many scalar elements as needed
134       // for the flattening.
135       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
136            EltIdx != EltEnd; ++EltIdx)
137         Ops.push_back(Undef->getOperand(0).getReg());
138       break;
139     }
140     default:
141       return false;
142     }
143   }
144   return true;
145 }
146 void CombinerHelper::applyCombineConcatVectors(
147     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
148   // We determined that the concat_vectors can be flatten.
149   // Generate the flattened build_vector.
150   Register DstReg = MI.getOperand(0).getReg();
151   Builder.setInsertPt(*MI.getParent(), MI);
152   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
153 
154   // Note: IsUndef is sort of redundant. We could have determine it by
155   // checking that at all Ops are undef.  Alternatively, we could have
156   // generate a build_vector of undefs and rely on another combine to
157   // clean that up.  For now, given we already gather this information
158   // in tryCombineConcatVectors, just save compile time and issue the
159   // right thing.
160   if (IsUndef)
161     Builder.buildUndef(NewDstReg);
162   else
163     Builder.buildBuildVector(NewDstReg, Ops);
164   MI.eraseFromParent();
165   replaceRegWith(MRI, DstReg, NewDstReg);
166 }
167 
168 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
169   SmallVector<Register, 4> Ops;
170   if (matchCombineShuffleVector(MI, Ops)) {
171     applyCombineShuffleVector(MI, Ops);
172     return true;
173   }
174   return false;
175 }
176 
177 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
178                                                SmallVectorImpl<Register> &Ops) {
179   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
180          "Invalid instruction kind");
181   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
182   Register Src1 = MI.getOperand(1).getReg();
183   LLT SrcType = MRI.getType(Src1);
184   // As bizarre as it may look, shuffle vector can actually produce
185   // scalar! This is because at the IR level a <1 x ty> shuffle
186   // vector is perfectly valid.
187   unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
188   unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
189 
190   // If the resulting vector is smaller than the size of the source
191   // vectors being concatenated, we won't be able to replace the
192   // shuffle vector into a concat_vectors.
193   //
194   // Note: We may still be able to produce a concat_vectors fed by
195   //       extract_vector_elt and so on. It is less clear that would
196   //       be better though, so don't bother for now.
197   //
198   // If the destination is a scalar, the size of the sources doesn't
199   // matter. we will lower the shuffle to a plain copy. This will
200   // work only if the source and destination have the same size. But
201   // that's covered by the next condition.
202   //
203   // TODO: If the size between the source and destination don't match
204   //       we could still emit an extract vector element in that case.
205   if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
206     return false;
207 
208   // Check that the shuffle mask can be broken evenly between the
209   // different sources.
210   if (DstNumElts % SrcNumElts != 0)
211     return false;
212 
213   // Mask length is a multiple of the source vector length.
214   // Check if the shuffle is some kind of concatenation of the input
215   // vectors.
216   unsigned NumConcat = DstNumElts / SrcNumElts;
217   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
218   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
219   for (unsigned i = 0; i != DstNumElts; ++i) {
220     int Idx = Mask[i];
221     // Undef value.
222     if (Idx < 0)
223       continue;
224     // Ensure the indices in each SrcType sized piece are sequential and that
225     // the same source is used for the whole piece.
226     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
227         (ConcatSrcs[i / SrcNumElts] >= 0 &&
228          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
229       return false;
230     // Remember which source this index came from.
231     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
232   }
233 
234   // The shuffle is concatenating multiple vectors together.
235   // Collect the different operands for that.
236   Register UndefReg;
237   Register Src2 = MI.getOperand(2).getReg();
238   for (auto Src : ConcatSrcs) {
239     if (Src < 0) {
240       if (!UndefReg) {
241         Builder.setInsertPt(*MI.getParent(), MI);
242         UndefReg = Builder.buildUndef(SrcType).getReg(0);
243       }
244       Ops.push_back(UndefReg);
245     } else if (Src == 0)
246       Ops.push_back(Src1);
247     else
248       Ops.push_back(Src2);
249   }
250   return true;
251 }
252 
253 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
254                                                const ArrayRef<Register> Ops) {
255   Register DstReg = MI.getOperand(0).getReg();
256   Builder.setInsertPt(*MI.getParent(), MI);
257   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
258 
259   if (Ops.size() == 1)
260     Builder.buildCopy(NewDstReg, Ops[0]);
261   else
262     Builder.buildMerge(NewDstReg, Ops);
263 
264   MI.eraseFromParent();
265   replaceRegWith(MRI, DstReg, NewDstReg);
266 }
267 
268 namespace {
269 
270 /// Select a preference between two uses. CurrentUse is the current preference
271 /// while *ForCandidate is attributes of the candidate under consideration.
272 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
273                                   const LLT TyForCandidate,
274                                   unsigned OpcodeForCandidate,
275                                   MachineInstr *MIForCandidate) {
276   if (!CurrentUse.Ty.isValid()) {
277     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
278         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
279       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
280     return CurrentUse;
281   }
282 
283   // We permit the extend to hoist through basic blocks but this is only
284   // sensible if the target has extending loads. If you end up lowering back
285   // into a load and extend during the legalizer then the end result is
286   // hoisting the extend up to the load.
287 
288   // Prefer defined extensions to undefined extensions as these are more
289   // likely to reduce the number of instructions.
290   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
291       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
292     return CurrentUse;
293   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
294            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
295     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
296 
297   // Prefer sign extensions to zero extensions as sign-extensions tend to be
298   // more expensive.
299   if (CurrentUse.Ty == TyForCandidate) {
300     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
301         OpcodeForCandidate == TargetOpcode::G_ZEXT)
302       return CurrentUse;
303     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
304              OpcodeForCandidate == TargetOpcode::G_SEXT)
305       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
306   }
307 
308   // This is potentially target specific. We've chosen the largest type
309   // because G_TRUNC is usually free. One potential catch with this is that
310   // some targets have a reduced number of larger registers than smaller
311   // registers and this choice potentially increases the live-range for the
312   // larger value.
313   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
314     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
315   }
316   return CurrentUse;
317 }
318 
319 /// Find a suitable place to insert some instructions and insert them. This
320 /// function accounts for special cases like inserting before a PHI node.
321 /// The current strategy for inserting before PHI's is to duplicate the
322 /// instructions for each predecessor. However, while that's ok for G_TRUNC
323 /// on most targets since it generally requires no code, other targets/cases may
324 /// want to try harder to find a dominating block.
325 static void InsertInsnsWithoutSideEffectsBeforeUse(
326     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
327     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
328                        MachineOperand &UseMO)>
329         Inserter) {
330   MachineInstr &UseMI = *UseMO.getParent();
331 
332   MachineBasicBlock *InsertBB = UseMI.getParent();
333 
334   // If the use is a PHI then we want the predecessor block instead.
335   if (UseMI.isPHI()) {
336     MachineOperand *PredBB = std::next(&UseMO);
337     InsertBB = PredBB->getMBB();
338   }
339 
340   // If the block is the same block as the def then we want to insert just after
341   // the def instead of at the start of the block.
342   if (InsertBB == DefMI.getParent()) {
343     MachineBasicBlock::iterator InsertPt = &DefMI;
344     Inserter(InsertBB, std::next(InsertPt), UseMO);
345     return;
346   }
347 
348   // Otherwise we want the start of the BB
349   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
350 }
351 } // end anonymous namespace
352 
353 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
354   PreferredTuple Preferred;
355   if (matchCombineExtendingLoads(MI, Preferred)) {
356     applyCombineExtendingLoads(MI, Preferred);
357     return true;
358   }
359   return false;
360 }
361 
362 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
363                                                 PreferredTuple &Preferred) {
364   // We match the loads and follow the uses to the extend instead of matching
365   // the extends and following the def to the load. This is because the load
366   // must remain in the same position for correctness (unless we also add code
367   // to find a safe place to sink it) whereas the extend is freely movable.
368   // It also prevents us from duplicating the load for the volatile case or just
369   // for performance.
370 
371   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
372       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
373       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
374     return false;
375 
376   auto &LoadValue = MI.getOperand(0);
377   assert(LoadValue.isReg() && "Result wasn't a register?");
378 
379   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
380   if (!LoadValueTy.isScalar())
381     return false;
382 
383   // Most architectures are going to legalize <s8 loads into at least a 1 byte
384   // load, and the MMOs can only describe memory accesses in multiples of bytes.
385   // If we try to perform extload combining on those, we can end up with
386   // %a(s8) = extload %ptr (load 1 byte from %ptr)
387   // ... which is an illegal extload instruction.
388   if (LoadValueTy.getSizeInBits() < 8)
389     return false;
390 
391   // For non power-of-2 types, they will very likely be legalized into multiple
392   // loads. Don't bother trying to match them into extending loads.
393   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
394     return false;
395 
396   // Find the preferred type aside from the any-extends (unless it's the only
397   // one) and non-extending ops. We'll emit an extending load to that type and
398   // and emit a variant of (extend (trunc X)) for the others according to the
399   // relative type sizes. At the same time, pick an extend to use based on the
400   // extend involved in the chosen type.
401   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
402                                  ? TargetOpcode::G_ANYEXT
403                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
404                                        ? TargetOpcode::G_SEXT
405                                        : TargetOpcode::G_ZEXT;
406   Preferred = {LLT(), PreferredOpcode, nullptr};
407   for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
408     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
409         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
410         (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
411       // Check for legality.
412       if (LI) {
413         LegalityQuery::MemDesc MMDesc;
414         const auto &MMO = **MI.memoperands_begin();
415         MMDesc.SizeInBits = MMO.getSizeInBits();
416         MMDesc.AlignInBits = MMO.getAlign().value() * 8;
417         MMDesc.Ordering = MMO.getOrdering();
418         LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
419         LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
420         if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
421             LegalizeActions::Legal)
422           continue;
423       }
424       Preferred = ChoosePreferredUse(Preferred,
425                                      MRI.getType(UseMI.getOperand(0).getReg()),
426                                      UseMI.getOpcode(), &UseMI);
427     }
428   }
429 
430   // There were no extends
431   if (!Preferred.MI)
432     return false;
433   // It should be impossible to chose an extend without selecting a different
434   // type since by definition the result of an extend is larger.
435   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
436 
437   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
438   return true;
439 }
440 
441 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
442                                                 PreferredTuple &Preferred) {
443   // Rewrite the load to the chosen extending load.
444   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
445 
446   // Inserter to insert a truncate back to the original type at a given point
447   // with some basic CSE to limit truncate duplication to one per BB.
448   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
449   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
450                            MachineBasicBlock::iterator InsertBefore,
451                            MachineOperand &UseMO) {
452     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
453     if (PreviouslyEmitted) {
454       Observer.changingInstr(*UseMO.getParent());
455       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
456       Observer.changedInstr(*UseMO.getParent());
457       return;
458     }
459 
460     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
461     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
462     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
463     EmittedInsns[InsertIntoBB] = NewMI;
464     replaceRegOpWith(MRI, UseMO, NewDstReg);
465   };
466 
467   Observer.changingInstr(MI);
468   MI.setDesc(
469       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
470                                ? TargetOpcode::G_SEXTLOAD
471                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
472                                      ? TargetOpcode::G_ZEXTLOAD
473                                      : TargetOpcode::G_LOAD));
474 
475   // Rewrite all the uses to fix up the types.
476   auto &LoadValue = MI.getOperand(0);
477   SmallVector<MachineOperand *, 4> Uses;
478   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
479     Uses.push_back(&UseMO);
480 
481   for (auto *UseMO : Uses) {
482     MachineInstr *UseMI = UseMO->getParent();
483 
484     // If the extend is compatible with the preferred extend then we should fix
485     // up the type and extend so that it uses the preferred use.
486     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
487         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
488       Register UseDstReg = UseMI->getOperand(0).getReg();
489       MachineOperand &UseSrcMO = UseMI->getOperand(1);
490       const LLT UseDstTy = MRI.getType(UseDstReg);
491       if (UseDstReg != ChosenDstReg) {
492         if (Preferred.Ty == UseDstTy) {
493           // If the use has the same type as the preferred use, then merge
494           // the vregs and erase the extend. For example:
495           //    %1:_(s8) = G_LOAD ...
496           //    %2:_(s32) = G_SEXT %1(s8)
497           //    %3:_(s32) = G_ANYEXT %1(s8)
498           //    ... = ... %3(s32)
499           // rewrites to:
500           //    %2:_(s32) = G_SEXTLOAD ...
501           //    ... = ... %2(s32)
502           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
503           Observer.erasingInstr(*UseMO->getParent());
504           UseMO->getParent()->eraseFromParent();
505         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
506           // If the preferred size is smaller, then keep the extend but extend
507           // from the result of the extending load. For example:
508           //    %1:_(s8) = G_LOAD ...
509           //    %2:_(s32) = G_SEXT %1(s8)
510           //    %3:_(s64) = G_ANYEXT %1(s8)
511           //    ... = ... %3(s64)
512           /// rewrites to:
513           //    %2:_(s32) = G_SEXTLOAD ...
514           //    %3:_(s64) = G_ANYEXT %2:_(s32)
515           //    ... = ... %3(s64)
516           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
517         } else {
518           // If the preferred size is large, then insert a truncate. For
519           // example:
520           //    %1:_(s8) = G_LOAD ...
521           //    %2:_(s64) = G_SEXT %1(s8)
522           //    %3:_(s32) = G_ZEXT %1(s8)
523           //    ... = ... %3(s32)
524           /// rewrites to:
525           //    %2:_(s64) = G_SEXTLOAD ...
526           //    %4:_(s8) = G_TRUNC %2:_(s32)
527           //    %3:_(s64) = G_ZEXT %2:_(s8)
528           //    ... = ... %3(s64)
529           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
530                                                  InsertTruncAt);
531         }
532         continue;
533       }
534       // The use is (one of) the uses of the preferred use we chose earlier.
535       // We're going to update the load to def this value later so just erase
536       // the old extend.
537       Observer.erasingInstr(*UseMO->getParent());
538       UseMO->getParent()->eraseFromParent();
539       continue;
540     }
541 
542     // The use isn't an extend. Truncate back to the type we originally loaded.
543     // This is free on many targets.
544     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
545   }
546 
547   MI.getOperand(0).setReg(ChosenDstReg);
548   Observer.changedInstr(MI);
549 }
550 
551 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
552                                    const MachineInstr &UseMI) {
553   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
554          "shouldn't consider debug uses");
555   assert(DefMI.getParent() == UseMI.getParent());
556   if (&DefMI == &UseMI)
557     return false;
558 
559   // Loop through the basic block until we find one of the instructions.
560   MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
561   for (; &*I != &DefMI && &*I != &UseMI; ++I)
562     return &*I == &DefMI;
563 
564   llvm_unreachable("Block must contain instructions");
565 }
566 
567 bool CombinerHelper::dominates(const MachineInstr &DefMI,
568                                const MachineInstr &UseMI) {
569   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
570          "shouldn't consider debug uses");
571   if (MDT)
572     return MDT->dominates(&DefMI, &UseMI);
573   else if (DefMI.getParent() != UseMI.getParent())
574     return false;
575 
576   return isPredecessor(DefMI, UseMI);
577 }
578 
579 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
580   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
581   Register SrcReg = MI.getOperand(1).getReg();
582   Register LoadUser = SrcReg;
583 
584   if (MRI.getType(SrcReg).isVector())
585     return false;
586 
587   Register TruncSrc;
588   if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
589     LoadUser = TruncSrc;
590 
591   uint64_t SizeInBits = MI.getOperand(2).getImm();
592   // If the source is a G_SEXTLOAD from the same bit width, then we don't
593   // need any extend at all, just a truncate.
594   if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
595     const auto &MMO = **LoadMI->memoperands_begin();
596     // If truncating more than the original extended value, abort.
597     if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
598       return false;
599     if (MMO.getSizeInBits() == SizeInBits)
600       return true;
601   }
602   return false;
603 }
604 
605 bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
606   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
607   Builder.setInstrAndDebugLoc(MI);
608   Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
609   MI.eraseFromParent();
610   return true;
611 }
612 
613 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
614                                             Register &Base, Register &Offset) {
615   auto &MF = *MI.getParent()->getParent();
616   const auto &TLI = *MF.getSubtarget().getTargetLowering();
617 
618 #ifndef NDEBUG
619   unsigned Opcode = MI.getOpcode();
620   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
621          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
622 #endif
623 
624   Base = MI.getOperand(1).getReg();
625   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
626   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
627     return false;
628 
629   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
630 
631   for (auto &Use : MRI.use_nodbg_instructions(Base)) {
632     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
633       continue;
634 
635     Offset = Use.getOperand(2).getReg();
636     if (!ForceLegalIndexing &&
637         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
638       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
639                         << Use);
640       continue;
641     }
642 
643     // Make sure the offset calculation is before the potentially indexed op.
644     // FIXME: we really care about dependency here. The offset calculation might
645     // be movable.
646     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
647     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
648       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
649                         << Use);
650       continue;
651     }
652 
653     // FIXME: check whether all uses of Base are load/store with foldable
654     // addressing modes. If so, using the normal addr-modes is better than
655     // forming an indexed one.
656 
657     bool MemOpDominatesAddrUses = true;
658     for (auto &PtrAddUse :
659          MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
660       if (!dominates(MI, PtrAddUse)) {
661         MemOpDominatesAddrUses = false;
662         break;
663       }
664     }
665 
666     if (!MemOpDominatesAddrUses) {
667       LLVM_DEBUG(
668           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
669                  << Use);
670       continue;
671     }
672 
673     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
674     Addr = Use.getOperand(0).getReg();
675     return true;
676   }
677 
678   return false;
679 }
680 
681 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
682                                            Register &Base, Register &Offset) {
683   auto &MF = *MI.getParent()->getParent();
684   const auto &TLI = *MF.getSubtarget().getTargetLowering();
685 
686 #ifndef NDEBUG
687   unsigned Opcode = MI.getOpcode();
688   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
689          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
690 #endif
691 
692   Addr = MI.getOperand(1).getReg();
693   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
694   if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
695     return false;
696 
697   Base = AddrDef->getOperand(1).getReg();
698   Offset = AddrDef->getOperand(2).getReg();
699 
700   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
701 
702   if (!ForceLegalIndexing &&
703       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
704     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
705     return false;
706   }
707 
708   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
709   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
710     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
711     return false;
712   }
713 
714   if (MI.getOpcode() == TargetOpcode::G_STORE) {
715     // Would require a copy.
716     if (Base == MI.getOperand(0).getReg()) {
717       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
718       return false;
719     }
720 
721     // We're expecting one use of Addr in MI, but it could also be the
722     // value stored, which isn't actually dominated by the instruction.
723     if (MI.getOperand(0).getReg() == Addr) {
724       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
725       return false;
726     }
727   }
728 
729   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
730   // That might allow us to end base's liveness here by adjusting the constant.
731 
732   for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
733     if (!dominates(MI, UseMI)) {
734       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
735       return false;
736     }
737   }
738 
739   return true;
740 }
741 
742 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
743   IndexedLoadStoreMatchInfo MatchInfo;
744   if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
745     applyCombineIndexedLoadStore(MI, MatchInfo);
746     return true;
747   }
748   return false;
749 }
750 
751 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
752   unsigned Opcode = MI.getOpcode();
753   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
754       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
755     return false;
756 
757   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
758                                           MatchInfo.Offset);
759   if (!MatchInfo.IsPre &&
760       !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
761                               MatchInfo.Offset))
762     return false;
763 
764   return true;
765 }
766 
767 void CombinerHelper::applyCombineIndexedLoadStore(
768     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
769   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
770   MachineIRBuilder MIRBuilder(MI);
771   unsigned Opcode = MI.getOpcode();
772   bool IsStore = Opcode == TargetOpcode::G_STORE;
773   unsigned NewOpcode;
774   switch (Opcode) {
775   case TargetOpcode::G_LOAD:
776     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
777     break;
778   case TargetOpcode::G_SEXTLOAD:
779     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
780     break;
781   case TargetOpcode::G_ZEXTLOAD:
782     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
783     break;
784   case TargetOpcode::G_STORE:
785     NewOpcode = TargetOpcode::G_INDEXED_STORE;
786     break;
787   default:
788     llvm_unreachable("Unknown load/store opcode");
789   }
790 
791   auto MIB = MIRBuilder.buildInstr(NewOpcode);
792   if (IsStore) {
793     MIB.addDef(MatchInfo.Addr);
794     MIB.addUse(MI.getOperand(0).getReg());
795   } else {
796     MIB.addDef(MI.getOperand(0).getReg());
797     MIB.addDef(MatchInfo.Addr);
798   }
799 
800   MIB.addUse(MatchInfo.Base);
801   MIB.addUse(MatchInfo.Offset);
802   MIB.addImm(MatchInfo.IsPre);
803   MI.eraseFromParent();
804   AddrDef.eraseFromParent();
805 
806   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
807 }
808 
809 bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
810   if (MI.getOpcode() != TargetOpcode::G_BR)
811     return false;
812 
813   // Try to match the following:
814   // bb1:
815   //   %c(s32) = G_ICMP pred, %a, %b
816   //   %c1(s1) = G_TRUNC %c(s32)
817   //   G_BRCOND %c1, %bb2
818   //   G_BR %bb3
819   // bb2:
820   // ...
821   // bb3:
822 
823   // The above pattern does not have a fall through to the successor bb2, always
824   // resulting in a branch no matter which path is taken. Here we try to find
825   // and replace that pattern with conditional branch to bb3 and otherwise
826   // fallthrough to bb2.
827 
828   MachineBasicBlock *MBB = MI.getParent();
829   MachineBasicBlock::iterator BrIt(MI);
830   if (BrIt == MBB->begin())
831     return false;
832   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
833 
834   MachineInstr *BrCond = &*std::prev(BrIt);
835   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
836     return false;
837 
838   // Check that the next block is the conditional branch target.
839   if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
840     return false;
841 
842   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
843   if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
844       !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
845     return false;
846   return true;
847 }
848 
849 bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
850   if (!matchElideBrByInvertingCond(MI))
851     return false;
852   applyElideBrByInvertingCond(MI);
853   return true;
854 }
855 
856 void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
857   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
858   MachineBasicBlock::iterator BrIt(MI);
859   MachineInstr *BrCond = &*std::prev(BrIt);
860   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
861 
862   CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
863       (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
864 
865   // Invert the G_ICMP condition.
866   Observer.changingInstr(*CmpMI);
867   CmpMI->getOperand(1).setPredicate(InversePred);
868   Observer.changedInstr(*CmpMI);
869 
870   // Change the conditional branch target.
871   Observer.changingInstr(*BrCond);
872   BrCond->getOperand(1).setMBB(BrTarget);
873   Observer.changedInstr(*BrCond);
874   MI.eraseFromParent();
875 }
876 
877 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
878   // On Darwin, -Os means optimize for size without hurting performance, so
879   // only really optimize for size when -Oz (MinSize) is used.
880   if (MF.getTarget().getTargetTriple().isOSDarwin())
881     return MF.getFunction().hasMinSize();
882   return MF.getFunction().hasOptSize();
883 }
884 
885 // Returns a list of types to use for memory op lowering in MemOps. A partial
886 // port of findOptimalMemOpLowering in TargetLowering.
887 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
888                                           unsigned Limit, const MemOp &Op,
889                                           unsigned DstAS, unsigned SrcAS,
890                                           const AttributeList &FuncAttributes,
891                                           const TargetLowering &TLI) {
892   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
893     return false;
894 
895   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
896 
897   if (Ty == LLT()) {
898     // Use the largest scalar type whose alignment constraints are satisfied.
899     // We only need to check DstAlign here as SrcAlign is always greater or
900     // equal to DstAlign (or zero).
901     Ty = LLT::scalar(64);
902     if (Op.isFixedDstAlign())
903       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
904              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
905         Ty = LLT::scalar(Ty.getSizeInBytes());
906     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
907     // FIXME: check for the largest legal type we can load/store to.
908   }
909 
910   unsigned NumMemOps = 0;
911   uint64_t Size = Op.size();
912   while (Size) {
913     unsigned TySize = Ty.getSizeInBytes();
914     while (TySize > Size) {
915       // For now, only use non-vector load / store's for the left-over pieces.
916       LLT NewTy = Ty;
917       // FIXME: check for mem op safety and legality of the types. Not all of
918       // SDAGisms map cleanly to GISel concepts.
919       if (NewTy.isVector())
920         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
921       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
922       unsigned NewTySize = NewTy.getSizeInBytes();
923       assert(NewTySize > 0 && "Could not find appropriate type");
924 
925       // If the new LLT cannot cover all of the remaining bits, then consider
926       // issuing a (or a pair of) unaligned and overlapping load / store.
927       bool Fast;
928       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
929       MVT VT = getMVTForLLT(Ty);
930       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
931           TLI.allowsMisalignedMemoryAccesses(
932               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
933               MachineMemOperand::MONone, &Fast) &&
934           Fast)
935         TySize = Size;
936       else {
937         Ty = NewTy;
938         TySize = NewTySize;
939       }
940     }
941 
942     if (++NumMemOps > Limit)
943       return false;
944 
945     MemOps.push_back(Ty);
946     Size -= TySize;
947   }
948 
949   return true;
950 }
951 
952 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
953   if (Ty.isVector())
954     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
955                                 Ty.getNumElements());
956   return IntegerType::get(C, Ty.getSizeInBits());
957 }
958 
959 // Get a vectorized representation of the memset value operand, GISel edition.
960 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
961   MachineRegisterInfo &MRI = *MIB.getMRI();
962   unsigned NumBits = Ty.getScalarSizeInBits();
963   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
964   if (!Ty.isVector() && ValVRegAndVal) {
965     unsigned KnownVal = ValVRegAndVal->Value;
966     APInt Scalar = APInt(8, KnownVal);
967     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
968     return MIB.buildConstant(Ty, SplatVal).getReg(0);
969   }
970 
971   // Extend the byte value to the larger type, and then multiply by a magic
972   // value 0x010101... in order to replicate it across every byte.
973   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
974   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
975     return MIB.buildConstant(Ty, 0).getReg(0);
976   }
977 
978   LLT ExtType = Ty.getScalarType();
979   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
980   if (NumBits > 8) {
981     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
982     auto MagicMI = MIB.buildConstant(ExtType, Magic);
983     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
984   }
985 
986   // For vector types create a G_BUILD_VECTOR.
987   if (Ty.isVector())
988     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
989 
990   return Val;
991 }
992 
993 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
994                                     Register Val, unsigned KnownLen,
995                                     Align Alignment, bool IsVolatile) {
996   auto &MF = *MI.getParent()->getParent();
997   const auto &TLI = *MF.getSubtarget().getTargetLowering();
998   auto &DL = MF.getDataLayout();
999   LLVMContext &C = MF.getFunction().getContext();
1000 
1001   assert(KnownLen != 0 && "Have a zero length memset length!");
1002 
1003   bool DstAlignCanChange = false;
1004   MachineFrameInfo &MFI = MF.getFrameInfo();
1005   bool OptSize = shouldLowerMemFuncForSize(MF);
1006 
1007   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1008   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1009     DstAlignCanChange = true;
1010 
1011   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
1012   std::vector<LLT> MemOps;
1013 
1014   const auto &DstMMO = **MI.memoperands_begin();
1015   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1016 
1017   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1018   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1019 
1020   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1021                                      MemOp::Set(KnownLen, DstAlignCanChange,
1022                                                 Alignment,
1023                                                 /*IsZeroMemset=*/IsZeroVal,
1024                                                 /*IsVolatile=*/IsVolatile),
1025                                      DstPtrInfo.getAddrSpace(), ~0u,
1026                                      MF.getFunction().getAttributes(), TLI))
1027     return false;
1028 
1029   if (DstAlignCanChange) {
1030     // Get an estimate of the type from the LLT.
1031     Type *IRTy = getTypeForLLT(MemOps[0], C);
1032     Align NewAlign = DL.getABITypeAlign(IRTy);
1033     if (NewAlign > Alignment) {
1034       Alignment = NewAlign;
1035       unsigned FI = FIDef->getOperand(1).getIndex();
1036       // Give the stack frame object a larger alignment if needed.
1037       if (MFI.getObjectAlign(FI) < Alignment)
1038         MFI.setObjectAlignment(FI, Alignment);
1039     }
1040   }
1041 
1042   MachineIRBuilder MIB(MI);
1043   // Find the largest store and generate the bit pattern for it.
1044   LLT LargestTy = MemOps[0];
1045   for (unsigned i = 1; i < MemOps.size(); i++)
1046     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1047       LargestTy = MemOps[i];
1048 
1049   // The memset stored value is always defined as an s8, so in order to make it
1050   // work with larger store types we need to repeat the bit pattern across the
1051   // wider type.
1052   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1053 
1054   if (!MemSetValue)
1055     return false;
1056 
1057   // Generate the stores. For each store type in the list, we generate the
1058   // matching store of that type to the destination address.
1059   LLT PtrTy = MRI.getType(Dst);
1060   unsigned DstOff = 0;
1061   unsigned Size = KnownLen;
1062   for (unsigned I = 0; I < MemOps.size(); I++) {
1063     LLT Ty = MemOps[I];
1064     unsigned TySize = Ty.getSizeInBytes();
1065     if (TySize > Size) {
1066       // Issuing an unaligned load / store pair that overlaps with the previous
1067       // pair. Adjust the offset accordingly.
1068       assert(I == MemOps.size() - 1 && I != 0);
1069       DstOff -= TySize - Size;
1070     }
1071 
1072     // If this store is smaller than the largest store see whether we can get
1073     // the smaller value for free with a truncate.
1074     Register Value = MemSetValue;
1075     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1076       MVT VT = getMVTForLLT(Ty);
1077       MVT LargestVT = getMVTForLLT(LargestTy);
1078       if (!LargestTy.isVector() && !Ty.isVector() &&
1079           TLI.isTruncateFree(LargestVT, VT))
1080         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1081       else
1082         Value = getMemsetValue(Val, Ty, MIB);
1083       if (!Value)
1084         return false;
1085     }
1086 
1087     auto *StoreMMO =
1088         MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1089 
1090     Register Ptr = Dst;
1091     if (DstOff != 0) {
1092       auto Offset =
1093           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1094       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1095     }
1096 
1097     MIB.buildStore(Value, Ptr, *StoreMMO);
1098     DstOff += Ty.getSizeInBytes();
1099     Size -= TySize;
1100   }
1101 
1102   MI.eraseFromParent();
1103   return true;
1104 }
1105 
1106 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1107                                     Register Src, unsigned KnownLen,
1108                                     Align DstAlign, Align SrcAlign,
1109                                     bool IsVolatile) {
1110   auto &MF = *MI.getParent()->getParent();
1111   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1112   auto &DL = MF.getDataLayout();
1113   LLVMContext &C = MF.getFunction().getContext();
1114 
1115   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1116 
1117   bool DstAlignCanChange = false;
1118   MachineFrameInfo &MFI = MF.getFrameInfo();
1119   bool OptSize = shouldLowerMemFuncForSize(MF);
1120   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1121 
1122   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1123   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1124     DstAlignCanChange = true;
1125 
1126   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1127   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1128   // if the memcpy is in a tail call position.
1129 
1130   unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1131   std::vector<LLT> MemOps;
1132 
1133   const auto &DstMMO = **MI.memoperands_begin();
1134   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1135   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1136   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1137 
1138   if (!findGISelOptimalMemOpLowering(
1139           MemOps, Limit,
1140           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1141                       IsVolatile),
1142           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1143           MF.getFunction().getAttributes(), TLI))
1144     return false;
1145 
1146   if (DstAlignCanChange) {
1147     // Get an estimate of the type from the LLT.
1148     Type *IRTy = getTypeForLLT(MemOps[0], C);
1149     Align NewAlign = DL.getABITypeAlign(IRTy);
1150 
1151     // Don't promote to an alignment that would require dynamic stack
1152     // realignment.
1153     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1154     if (!TRI->needsStackRealignment(MF))
1155       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1156         NewAlign = NewAlign / 2;
1157 
1158     if (NewAlign > Alignment) {
1159       Alignment = NewAlign;
1160       unsigned FI = FIDef->getOperand(1).getIndex();
1161       // Give the stack frame object a larger alignment if needed.
1162       if (MFI.getObjectAlign(FI) < Alignment)
1163         MFI.setObjectAlignment(FI, Alignment);
1164     }
1165   }
1166 
1167   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1168 
1169   MachineIRBuilder MIB(MI);
1170   // Now we need to emit a pair of load and stores for each of the types we've
1171   // collected. I.e. for each type, generate a load from the source pointer of
1172   // that type width, and then generate a corresponding store to the dest buffer
1173   // of that value loaded. This can result in a sequence of loads and stores
1174   // mixed types, depending on what the target specifies as good types to use.
1175   unsigned CurrOffset = 0;
1176   LLT PtrTy = MRI.getType(Src);
1177   unsigned Size = KnownLen;
1178   for (auto CopyTy : MemOps) {
1179     // Issuing an unaligned load / store pair  that overlaps with the previous
1180     // pair. Adjust the offset accordingly.
1181     if (CopyTy.getSizeInBytes() > Size)
1182       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1183 
1184     // Construct MMOs for the accesses.
1185     auto *LoadMMO =
1186         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1187     auto *StoreMMO =
1188         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1189 
1190     // Create the load.
1191     Register LoadPtr = Src;
1192     Register Offset;
1193     if (CurrOffset != 0) {
1194       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1195                    .getReg(0);
1196       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1197     }
1198     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1199 
1200     // Create the store.
1201     Register StorePtr =
1202         CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1203     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1204     CurrOffset += CopyTy.getSizeInBytes();
1205     Size -= CopyTy.getSizeInBytes();
1206   }
1207 
1208   MI.eraseFromParent();
1209   return true;
1210 }
1211 
1212 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1213                                      Register Src, unsigned KnownLen,
1214                                      Align DstAlign, Align SrcAlign,
1215                                      bool IsVolatile) {
1216   auto &MF = *MI.getParent()->getParent();
1217   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1218   auto &DL = MF.getDataLayout();
1219   LLVMContext &C = MF.getFunction().getContext();
1220 
1221   assert(KnownLen != 0 && "Have a zero length memmove length!");
1222 
1223   bool DstAlignCanChange = false;
1224   MachineFrameInfo &MFI = MF.getFrameInfo();
1225   bool OptSize = shouldLowerMemFuncForSize(MF);
1226   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1227 
1228   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1229   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1230     DstAlignCanChange = true;
1231 
1232   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1233   std::vector<LLT> MemOps;
1234 
1235   const auto &DstMMO = **MI.memoperands_begin();
1236   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1237   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1238   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1239 
1240   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1241   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1242   // same thing here.
1243   if (!findGISelOptimalMemOpLowering(
1244           MemOps, Limit,
1245           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1246                       /*IsVolatile*/ true),
1247           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1248           MF.getFunction().getAttributes(), TLI))
1249     return false;
1250 
1251   if (DstAlignCanChange) {
1252     // Get an estimate of the type from the LLT.
1253     Type *IRTy = getTypeForLLT(MemOps[0], C);
1254     Align NewAlign = DL.getABITypeAlign(IRTy);
1255 
1256     // Don't promote to an alignment that would require dynamic stack
1257     // realignment.
1258     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1259     if (!TRI->needsStackRealignment(MF))
1260       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1261         NewAlign = NewAlign / 2;
1262 
1263     if (NewAlign > Alignment) {
1264       Alignment = NewAlign;
1265       unsigned FI = FIDef->getOperand(1).getIndex();
1266       // Give the stack frame object a larger alignment if needed.
1267       if (MFI.getObjectAlign(FI) < Alignment)
1268         MFI.setObjectAlignment(FI, Alignment);
1269     }
1270   }
1271 
1272   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1273 
1274   MachineIRBuilder MIB(MI);
1275   // Memmove requires that we perform the loads first before issuing the stores.
1276   // Apart from that, this loop is pretty much doing the same thing as the
1277   // memcpy codegen function.
1278   unsigned CurrOffset = 0;
1279   LLT PtrTy = MRI.getType(Src);
1280   SmallVector<Register, 16> LoadVals;
1281   for (auto CopyTy : MemOps) {
1282     // Construct MMO for the load.
1283     auto *LoadMMO =
1284         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1285 
1286     // Create the load.
1287     Register LoadPtr = Src;
1288     if (CurrOffset != 0) {
1289       auto Offset =
1290           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1291       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1292     }
1293     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1294     CurrOffset += CopyTy.getSizeInBytes();
1295   }
1296 
1297   CurrOffset = 0;
1298   for (unsigned I = 0; I < MemOps.size(); ++I) {
1299     LLT CopyTy = MemOps[I];
1300     // Now store the values loaded.
1301     auto *StoreMMO =
1302         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1303 
1304     Register StorePtr = Dst;
1305     if (CurrOffset != 0) {
1306       auto Offset =
1307           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1308       StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1309     }
1310     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1311     CurrOffset += CopyTy.getSizeInBytes();
1312   }
1313   MI.eraseFromParent();
1314   return true;
1315 }
1316 
1317 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1318   // This combine is fairly complex so it's not written with a separate
1319   // matcher function.
1320   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1321   Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
1322   assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1323           ID == Intrinsic::memset) &&
1324          "Expected a memcpy like intrinsic");
1325 
1326   auto MMOIt = MI.memoperands_begin();
1327   const MachineMemOperand *MemOp = *MMOIt;
1328   bool IsVolatile = MemOp->isVolatile();
1329   // Don't try to optimize volatile.
1330   if (IsVolatile)
1331     return false;
1332 
1333   Align DstAlign = MemOp->getBaseAlign();
1334   Align SrcAlign;
1335   Register Dst = MI.getOperand(1).getReg();
1336   Register Src = MI.getOperand(2).getReg();
1337   Register Len = MI.getOperand(3).getReg();
1338 
1339   if (ID != Intrinsic::memset) {
1340     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1341     MemOp = *(++MMOIt);
1342     SrcAlign = MemOp->getBaseAlign();
1343   }
1344 
1345   // See if this is a constant length copy
1346   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1347   if (!LenVRegAndVal)
1348     return false; // Leave it to the legalizer to lower it to a libcall.
1349   unsigned KnownLen = LenVRegAndVal->Value;
1350 
1351   if (KnownLen == 0) {
1352     MI.eraseFromParent();
1353     return true;
1354   }
1355 
1356   if (MaxLen && KnownLen > MaxLen)
1357     return false;
1358 
1359   if (ID == Intrinsic::memcpy)
1360     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1361   if (ID == Intrinsic::memmove)
1362     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1363   if (ID == Intrinsic::memset)
1364     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1365   return false;
1366 }
1367 
1368 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1369                                            PtrAddChain &MatchInfo) {
1370   // We're trying to match the following pattern:
1371   //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1372   //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1373   // -->
1374   //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1375 
1376   if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1377     return false;
1378 
1379   Register Add2 = MI.getOperand(1).getReg();
1380   Register Imm1 = MI.getOperand(2).getReg();
1381   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1382   if (!MaybeImmVal)
1383     return false;
1384 
1385   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1386   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1387     return false;
1388 
1389   Register Base = Add2Def->getOperand(1).getReg();
1390   Register Imm2 = Add2Def->getOperand(2).getReg();
1391   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1392   if (!MaybeImm2Val)
1393     return false;
1394 
1395   // Pass the combined immediate to the apply function.
1396   MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1397   MatchInfo.Base = Base;
1398   return true;
1399 }
1400 
1401 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1402                                            PtrAddChain &MatchInfo) {
1403   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1404   MachineIRBuilder MIB(MI);
1405   LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1406   auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1407   Observer.changingInstr(MI);
1408   MI.getOperand(1).setReg(MatchInfo.Base);
1409   MI.getOperand(2).setReg(NewOffset.getReg(0));
1410   Observer.changedInstr(MI);
1411   return true;
1412 }
1413 
1414 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1415                                           unsigned &ShiftVal) {
1416   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1417   auto MaybeImmVal =
1418       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1419   if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
1420     return false;
1421   ShiftVal = Log2_64(MaybeImmVal->Value);
1422   return true;
1423 }
1424 
1425 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1426                                           unsigned &ShiftVal) {
1427   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1428   MachineIRBuilder MIB(MI);
1429   LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1430   auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1431   Observer.changingInstr(MI);
1432   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1433   MI.getOperand(2).setReg(ShiftCst.getReg(0));
1434   Observer.changedInstr(MI);
1435   return true;
1436 }
1437 
1438 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
1439                                                 unsigned TargetShiftSize,
1440                                                 unsigned &ShiftVal) {
1441   assert((MI.getOpcode() == TargetOpcode::G_SHL ||
1442           MI.getOpcode() == TargetOpcode::G_LSHR ||
1443           MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
1444 
1445   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1446   if (Ty.isVector()) // TODO:
1447     return false;
1448 
1449   // Don't narrow further than the requested size.
1450   unsigned Size = Ty.getSizeInBits();
1451   if (Size <= TargetShiftSize)
1452     return false;
1453 
1454   auto MaybeImmVal =
1455     getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1456   if (!MaybeImmVal)
1457     return false;
1458 
1459   ShiftVal = MaybeImmVal->Value;
1460   return ShiftVal >= Size / 2 && ShiftVal < Size;
1461 }
1462 
1463 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
1464                                                 const unsigned &ShiftVal) {
1465   Register DstReg = MI.getOperand(0).getReg();
1466   Register SrcReg = MI.getOperand(1).getReg();
1467   LLT Ty = MRI.getType(SrcReg);
1468   unsigned Size = Ty.getSizeInBits();
1469   unsigned HalfSize = Size / 2;
1470   assert(ShiftVal >= HalfSize);
1471 
1472   LLT HalfTy = LLT::scalar(HalfSize);
1473 
1474   Builder.setInstr(MI);
1475   auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
1476   unsigned NarrowShiftAmt = ShiftVal - HalfSize;
1477 
1478   if (MI.getOpcode() == TargetOpcode::G_LSHR) {
1479     Register Narrowed = Unmerge.getReg(1);
1480 
1481     //  dst = G_LSHR s64:x, C for C >= 32
1482     // =>
1483     //   lo, hi = G_UNMERGE_VALUES x
1484     //   dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
1485 
1486     if (NarrowShiftAmt != 0) {
1487       Narrowed = Builder.buildLShr(HalfTy, Narrowed,
1488         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1489     }
1490 
1491     auto Zero = Builder.buildConstant(HalfTy, 0);
1492     Builder.buildMerge(DstReg, { Narrowed, Zero });
1493   } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
1494     Register Narrowed = Unmerge.getReg(0);
1495     //  dst = G_SHL s64:x, C for C >= 32
1496     // =>
1497     //   lo, hi = G_UNMERGE_VALUES x
1498     //   dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
1499     if (NarrowShiftAmt != 0) {
1500       Narrowed = Builder.buildShl(HalfTy, Narrowed,
1501         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1502     }
1503 
1504     auto Zero = Builder.buildConstant(HalfTy, 0);
1505     Builder.buildMerge(DstReg, { Zero, Narrowed });
1506   } else {
1507     assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1508     auto Hi = Builder.buildAShr(
1509       HalfTy, Unmerge.getReg(1),
1510       Builder.buildConstant(HalfTy, HalfSize - 1));
1511 
1512     if (ShiftVal == HalfSize) {
1513       // (G_ASHR i64:x, 32) ->
1514       //   G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
1515       Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
1516     } else if (ShiftVal == Size - 1) {
1517       // Don't need a second shift.
1518       // (G_ASHR i64:x, 63) ->
1519       //   %narrowed = (G_ASHR hi_32(x), 31)
1520       //   G_MERGE_VALUES %narrowed, %narrowed
1521       Builder.buildMerge(DstReg, { Hi, Hi });
1522     } else {
1523       auto Lo = Builder.buildAShr(
1524         HalfTy, Unmerge.getReg(1),
1525         Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
1526 
1527       // (G_ASHR i64:x, C) ->, for C >= 32
1528       //   G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
1529       Builder.buildMerge(DstReg, { Lo, Hi });
1530     }
1531   }
1532 
1533   MI.eraseFromParent();
1534   return true;
1535 }
1536 
1537 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
1538                                               unsigned TargetShiftAmount) {
1539   unsigned ShiftAmt;
1540   if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
1541     applyCombineShiftToUnmerge(MI, ShiftAmt);
1542     return true;
1543   }
1544 
1545   return false;
1546 }
1547 
1548 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
1549   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
1550   Register DstReg = MI.getOperand(0).getReg();
1551   LLT DstTy = MRI.getType(DstReg);
1552   Register SrcReg = MI.getOperand(1).getReg();
1553   return mi_match(SrcReg, MRI,
1554                   m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
1555 }
1556 
1557 bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
1558   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
1559   Register DstReg = MI.getOperand(0).getReg();
1560   Builder.setInstr(MI);
1561   Builder.buildCopy(DstReg, Reg);
1562   MI.eraseFromParent();
1563   return true;
1564 }
1565 
1566 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
1567   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
1568   Register SrcReg = MI.getOperand(1).getReg();
1569   return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
1570 }
1571 
1572 bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
1573   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
1574   Register DstReg = MI.getOperand(0).getReg();
1575   Builder.setInstr(MI);
1576   Builder.buildZExtOrTrunc(DstReg, Reg);
1577   MI.eraseFromParent();
1578   return true;
1579 }
1580 
1581 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
1582   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1583     return MO.isReg() &&
1584            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1585   });
1586 }
1587 
1588 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
1589   return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1590     return !MO.isReg() ||
1591            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1592   });
1593 }
1594 
1595 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
1596   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
1597   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
1598   return all_of(Mask, [](int Elt) { return Elt < 0; });
1599 }
1600 
1601 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
1602   assert(MI.getOpcode() == TargetOpcode::G_STORE);
1603   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
1604                       MRI);
1605 }
1606 
1607 bool CombinerHelper::eraseInst(MachineInstr &MI) {
1608   MI.eraseFromParent();
1609   return true;
1610 }
1611 
1612 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
1613                                     const MachineOperand &MOP2) {
1614   if (!MOP1.isReg() || !MOP2.isReg())
1615     return false;
1616   MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
1617   if (!I1)
1618     return false;
1619   MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
1620   if (!I2)
1621     return false;
1622 
1623   // Handle a case like this:
1624   //
1625   // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
1626   //
1627   // Even though %0 and %1 are produced by the same instruction they are not
1628   // the same values.
1629   if (I1 == I2)
1630     return MOP1.getReg() == MOP2.getReg();
1631 
1632   // If we have an instruction which loads or stores, we can't guarantee that
1633   // it is identical.
1634   //
1635   // For example, we may have
1636   //
1637   // %x1 = G_LOAD %addr (load N from @somewhere)
1638   // ...
1639   // call @foo
1640   // ...
1641   // %x2 = G_LOAD %addr (load N from @somewhere)
1642   // ...
1643   // %or = G_OR %x1, %x2
1644   //
1645   // It's possible that @foo will modify whatever lives at the address we're
1646   // loading from. To be safe, let's just assume that all loads and stores
1647   // are different (unless we have something which is guaranteed to not
1648   // change.)
1649   if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
1650     return false;
1651 
1652   // Check for physical registers on the instructions first to avoid cases
1653   // like this:
1654   //
1655   // %a = COPY $physreg
1656   // ...
1657   // SOMETHING implicit-def $physreg
1658   // ...
1659   // %b = COPY $physreg
1660   //
1661   // These copies are not equivalent.
1662   if (any_of(I1->uses(), [](const MachineOperand &MO) {
1663         return MO.isReg() && MO.getReg().isPhysical();
1664       })) {
1665     // Check if we have a case like this:
1666     //
1667     // %a = COPY $physreg
1668     // %b = COPY %a
1669     //
1670     // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
1671     // From that, we know that they must have the same value, since they must
1672     // have come from the same COPY.
1673     return I1->isIdenticalTo(*I2);
1674   }
1675 
1676   // We don't have any physical registers, so we don't necessarily need the
1677   // same vreg defs.
1678   //
1679   // On the off-chance that there's some target instruction feeding into the
1680   // instruction, let's use produceSameValue instead of isIdenticalTo.
1681   return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
1682 }
1683 
1684 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
1685   if (!MOP.isReg())
1686     return false;
1687   // MIPatternMatch doesn't let us look through G_ZEXT etc.
1688   auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
1689   return ValAndVReg && ValAndVReg->Value == C;
1690 }
1691 
1692 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
1693                                                      unsigned OpIdx) {
1694   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
1695   Register OldReg = MI.getOperand(0).getReg();
1696   Register Replacement = MI.getOperand(OpIdx).getReg();
1697   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
1698   MI.eraseFromParent();
1699   replaceRegWith(MRI, OldReg, Replacement);
1700   return true;
1701 }
1702 
1703 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
1704   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
1705   // Match (cond ? x : x)
1706   return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
1707          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
1708                        MRI);
1709 }
1710 
1711 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
1712   return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
1713          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1714                        MRI);
1715 }
1716 
1717 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
1718   return matchConstantOp(MI.getOperand(OpIdx), 0) &&
1719          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
1720                        MRI);
1721 }
1722 
1723 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
1724   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1725   Builder.setInstr(MI);
1726   Builder.buildFConstant(MI.getOperand(0), C);
1727   MI.eraseFromParent();
1728   return true;
1729 }
1730 
1731 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
1732   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1733   Builder.setInstr(MI);
1734   Builder.buildConstant(MI.getOperand(0), C);
1735   MI.eraseFromParent();
1736   return true;
1737 }
1738 
1739 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
1740   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1741   Builder.setInstr(MI);
1742   Builder.buildUndef(MI.getOperand(0));
1743   MI.eraseFromParent();
1744   return true;
1745 }
1746 
1747 bool CombinerHelper::matchSimplifyAddToSub(
1748     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1749   Register LHS = MI.getOperand(1).getReg();
1750   Register RHS = MI.getOperand(2).getReg();
1751   Register &NewLHS = std::get<0>(MatchInfo);
1752   Register &NewRHS = std::get<1>(MatchInfo);
1753 
1754   // Helper lambda to check for opportunities for
1755   // ((0-A) + B) -> B - A
1756   // (A + (0-B)) -> A - B
1757   auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
1758     int64_t Cst;
1759     if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
1760         Cst != 0)
1761       return false;
1762     NewLHS = MaybeNewLHS;
1763     return true;
1764   };
1765 
1766   return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
1767 }
1768 
1769 bool CombinerHelper::applySimplifyAddToSub(
1770     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1771   Builder.setInstr(MI);
1772   Register SubLHS, SubRHS;
1773   std::tie(SubLHS, SubRHS) = MatchInfo;
1774   Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
1775   MI.eraseFromParent();
1776   return true;
1777 }
1778 
1779 bool CombinerHelper::tryCombine(MachineInstr &MI) {
1780   if (tryCombineCopy(MI))
1781     return true;
1782   if (tryCombineExtendingLoads(MI))
1783     return true;
1784   if (tryCombineIndexedLoadStore(MI))
1785     return true;
1786   return false;
1787 }
1788