1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/CodeGen/GlobalISel/Combiner.h"
10 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
12 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
13 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
14 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
15 #include "llvm/CodeGen/GlobalISel/Utils.h"
16 #include "llvm/CodeGen/MachineDominators.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineMemOperand.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Target/TargetMachine.h"
25 
26 #define DEBUG_TYPE "gi-combiner"
27 
28 using namespace llvm;
29 using namespace MIPatternMatch;
30 
31 // Option to allow testing of the combiner while no targets know about indexed
32 // addressing.
33 static cl::opt<bool>
34     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
35                        cl::desc("Force all indexed operations to be "
36                                 "legal for the GlobalISel combiner"));
37 
38 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
39                                MachineIRBuilder &B, GISelKnownBits *KB,
40                                MachineDominatorTree *MDT,
41                                const LegalizerInfo *LI)
42     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
43       KB(KB), MDT(MDT), LI(LI) {
44   (void)this->KB;
45 }
46 
47 const TargetLowering &CombinerHelper::getTargetLowering() const {
48   return *Builder.getMF().getSubtarget().getTargetLowering();
49 }
50 
51 bool CombinerHelper::isLegalOrBeforeLegalizer(
52     const LegalityQuery &Query) const {
53   return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
54 }
55 
56 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
57                                     Register ToReg) const {
58   Observer.changingAllUsesOfReg(MRI, FromReg);
59 
60   if (MRI.constrainRegAttrs(ToReg, FromReg))
61     MRI.replaceRegWith(FromReg, ToReg);
62   else
63     Builder.buildCopy(ToReg, FromReg);
64 
65   Observer.finishedChangingAllUsesOfReg();
66 }
67 
68 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
69                                       MachineOperand &FromRegOp,
70                                       Register ToReg) const {
71   assert(FromRegOp.getParent() && "Expected an operand in an MI");
72   Observer.changingInstr(*FromRegOp.getParent());
73 
74   FromRegOp.setReg(ToReg);
75 
76   Observer.changedInstr(*FromRegOp.getParent());
77 }
78 
79 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
80   if (matchCombineCopy(MI)) {
81     applyCombineCopy(MI);
82     return true;
83   }
84   return false;
85 }
86 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
87   if (MI.getOpcode() != TargetOpcode::COPY)
88     return false;
89   Register DstReg = MI.getOperand(0).getReg();
90   Register SrcReg = MI.getOperand(1).getReg();
91   return canReplaceReg(DstReg, SrcReg, MRI);
92 }
93 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
94   Register DstReg = MI.getOperand(0).getReg();
95   Register SrcReg = MI.getOperand(1).getReg();
96   MI.eraseFromParent();
97   replaceRegWith(MRI, DstReg, SrcReg);
98 }
99 
100 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
101   bool IsUndef = false;
102   SmallVector<Register, 4> Ops;
103   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
104     applyCombineConcatVectors(MI, IsUndef, Ops);
105     return true;
106   }
107   return false;
108 }
109 
110 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
111                                                SmallVectorImpl<Register> &Ops) {
112   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
113          "Invalid instruction");
114   IsUndef = true;
115   MachineInstr *Undef = nullptr;
116 
117   // Walk over all the operands of concat vectors and check if they are
118   // build_vector themselves or undef.
119   // Then collect their operands in Ops.
120   for (const MachineOperand &MO : MI.uses()) {
121     Register Reg = MO.getReg();
122     MachineInstr *Def = MRI.getVRegDef(Reg);
123     assert(Def && "Operand not defined");
124     switch (Def->getOpcode()) {
125     case TargetOpcode::G_BUILD_VECTOR:
126       IsUndef = false;
127       // Remember the operands of the build_vector to fold
128       // them into the yet-to-build flattened concat vectors.
129       for (const MachineOperand &BuildVecMO : Def->uses())
130         Ops.push_back(BuildVecMO.getReg());
131       break;
132     case TargetOpcode::G_IMPLICIT_DEF: {
133       LLT OpType = MRI.getType(Reg);
134       // Keep one undef value for all the undef operands.
135       if (!Undef) {
136         Builder.setInsertPt(*MI.getParent(), MI);
137         Undef = Builder.buildUndef(OpType.getScalarType());
138       }
139       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
140                  OpType.getScalarType() &&
141              "All undefs should have the same type");
142       // Break the undef vector in as many scalar elements as needed
143       // for the flattening.
144       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
145            EltIdx != EltEnd; ++EltIdx)
146         Ops.push_back(Undef->getOperand(0).getReg());
147       break;
148     }
149     default:
150       return false;
151     }
152   }
153   return true;
154 }
155 void CombinerHelper::applyCombineConcatVectors(
156     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
157   // We determined that the concat_vectors can be flatten.
158   // Generate the flattened build_vector.
159   Register DstReg = MI.getOperand(0).getReg();
160   Builder.setInsertPt(*MI.getParent(), MI);
161   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
162 
163   // Note: IsUndef is sort of redundant. We could have determine it by
164   // checking that at all Ops are undef.  Alternatively, we could have
165   // generate a build_vector of undefs and rely on another combine to
166   // clean that up.  For now, given we already gather this information
167   // in tryCombineConcatVectors, just save compile time and issue the
168   // right thing.
169   if (IsUndef)
170     Builder.buildUndef(NewDstReg);
171   else
172     Builder.buildBuildVector(NewDstReg, Ops);
173   MI.eraseFromParent();
174   replaceRegWith(MRI, DstReg, NewDstReg);
175 }
176 
177 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
178   SmallVector<Register, 4> Ops;
179   if (matchCombineShuffleVector(MI, Ops)) {
180     applyCombineShuffleVector(MI, Ops);
181     return true;
182   }
183   return false;
184 }
185 
186 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
187                                                SmallVectorImpl<Register> &Ops) {
188   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
189          "Invalid instruction kind");
190   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
191   Register Src1 = MI.getOperand(1).getReg();
192   LLT SrcType = MRI.getType(Src1);
193   // As bizarre as it may look, shuffle vector can actually produce
194   // scalar! This is because at the IR level a <1 x ty> shuffle
195   // vector is perfectly valid.
196   unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
197   unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
198 
199   // If the resulting vector is smaller than the size of the source
200   // vectors being concatenated, we won't be able to replace the
201   // shuffle vector into a concat_vectors.
202   //
203   // Note: We may still be able to produce a concat_vectors fed by
204   //       extract_vector_elt and so on. It is less clear that would
205   //       be better though, so don't bother for now.
206   //
207   // If the destination is a scalar, the size of the sources doesn't
208   // matter. we will lower the shuffle to a plain copy. This will
209   // work only if the source and destination have the same size. But
210   // that's covered by the next condition.
211   //
212   // TODO: If the size between the source and destination don't match
213   //       we could still emit an extract vector element in that case.
214   if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
215     return false;
216 
217   // Check that the shuffle mask can be broken evenly between the
218   // different sources.
219   if (DstNumElts % SrcNumElts != 0)
220     return false;
221 
222   // Mask length is a multiple of the source vector length.
223   // Check if the shuffle is some kind of concatenation of the input
224   // vectors.
225   unsigned NumConcat = DstNumElts / SrcNumElts;
226   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
227   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
228   for (unsigned i = 0; i != DstNumElts; ++i) {
229     int Idx = Mask[i];
230     // Undef value.
231     if (Idx < 0)
232       continue;
233     // Ensure the indices in each SrcType sized piece are sequential and that
234     // the same source is used for the whole piece.
235     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
236         (ConcatSrcs[i / SrcNumElts] >= 0 &&
237          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
238       return false;
239     // Remember which source this index came from.
240     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
241   }
242 
243   // The shuffle is concatenating multiple vectors together.
244   // Collect the different operands for that.
245   Register UndefReg;
246   Register Src2 = MI.getOperand(2).getReg();
247   for (auto Src : ConcatSrcs) {
248     if (Src < 0) {
249       if (!UndefReg) {
250         Builder.setInsertPt(*MI.getParent(), MI);
251         UndefReg = Builder.buildUndef(SrcType).getReg(0);
252       }
253       Ops.push_back(UndefReg);
254     } else if (Src == 0)
255       Ops.push_back(Src1);
256     else
257       Ops.push_back(Src2);
258   }
259   return true;
260 }
261 
262 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
263                                                const ArrayRef<Register> Ops) {
264   Register DstReg = MI.getOperand(0).getReg();
265   Builder.setInsertPt(*MI.getParent(), MI);
266   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
267 
268   if (Ops.size() == 1)
269     Builder.buildCopy(NewDstReg, Ops[0]);
270   else
271     Builder.buildMerge(NewDstReg, Ops);
272 
273   MI.eraseFromParent();
274   replaceRegWith(MRI, DstReg, NewDstReg);
275 }
276 
277 namespace {
278 
279 /// Select a preference between two uses. CurrentUse is the current preference
280 /// while *ForCandidate is attributes of the candidate under consideration.
281 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
282                                   const LLT TyForCandidate,
283                                   unsigned OpcodeForCandidate,
284                                   MachineInstr *MIForCandidate) {
285   if (!CurrentUse.Ty.isValid()) {
286     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
287         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
288       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
289     return CurrentUse;
290   }
291 
292   // We permit the extend to hoist through basic blocks but this is only
293   // sensible if the target has extending loads. If you end up lowering back
294   // into a load and extend during the legalizer then the end result is
295   // hoisting the extend up to the load.
296 
297   // Prefer defined extensions to undefined extensions as these are more
298   // likely to reduce the number of instructions.
299   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
300       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
301     return CurrentUse;
302   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
303            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
304     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
305 
306   // Prefer sign extensions to zero extensions as sign-extensions tend to be
307   // more expensive.
308   if (CurrentUse.Ty == TyForCandidate) {
309     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
310         OpcodeForCandidate == TargetOpcode::G_ZEXT)
311       return CurrentUse;
312     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
313              OpcodeForCandidate == TargetOpcode::G_SEXT)
314       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
315   }
316 
317   // This is potentially target specific. We've chosen the largest type
318   // because G_TRUNC is usually free. One potential catch with this is that
319   // some targets have a reduced number of larger registers than smaller
320   // registers and this choice potentially increases the live-range for the
321   // larger value.
322   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
323     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
324   }
325   return CurrentUse;
326 }
327 
328 /// Find a suitable place to insert some instructions and insert them. This
329 /// function accounts for special cases like inserting before a PHI node.
330 /// The current strategy for inserting before PHI's is to duplicate the
331 /// instructions for each predecessor. However, while that's ok for G_TRUNC
332 /// on most targets since it generally requires no code, other targets/cases may
333 /// want to try harder to find a dominating block.
334 static void InsertInsnsWithoutSideEffectsBeforeUse(
335     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
336     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
337                        MachineOperand &UseMO)>
338         Inserter) {
339   MachineInstr &UseMI = *UseMO.getParent();
340 
341   MachineBasicBlock *InsertBB = UseMI.getParent();
342 
343   // If the use is a PHI then we want the predecessor block instead.
344   if (UseMI.isPHI()) {
345     MachineOperand *PredBB = std::next(&UseMO);
346     InsertBB = PredBB->getMBB();
347   }
348 
349   // If the block is the same block as the def then we want to insert just after
350   // the def instead of at the start of the block.
351   if (InsertBB == DefMI.getParent()) {
352     MachineBasicBlock::iterator InsertPt = &DefMI;
353     Inserter(InsertBB, std::next(InsertPt), UseMO);
354     return;
355   }
356 
357   // Otherwise we want the start of the BB
358   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
359 }
360 } // end anonymous namespace
361 
362 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
363   PreferredTuple Preferred;
364   if (matchCombineExtendingLoads(MI, Preferred)) {
365     applyCombineExtendingLoads(MI, Preferred);
366     return true;
367   }
368   return false;
369 }
370 
371 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
372                                                 PreferredTuple &Preferred) {
373   // We match the loads and follow the uses to the extend instead of matching
374   // the extends and following the def to the load. This is because the load
375   // must remain in the same position for correctness (unless we also add code
376   // to find a safe place to sink it) whereas the extend is freely movable.
377   // It also prevents us from duplicating the load for the volatile case or just
378   // for performance.
379 
380   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
381       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
382       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
383     return false;
384 
385   auto &LoadValue = MI.getOperand(0);
386   assert(LoadValue.isReg() && "Result wasn't a register?");
387 
388   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
389   if (!LoadValueTy.isScalar())
390     return false;
391 
392   // Most architectures are going to legalize <s8 loads into at least a 1 byte
393   // load, and the MMOs can only describe memory accesses in multiples of bytes.
394   // If we try to perform extload combining on those, we can end up with
395   // %a(s8) = extload %ptr (load 1 byte from %ptr)
396   // ... which is an illegal extload instruction.
397   if (LoadValueTy.getSizeInBits() < 8)
398     return false;
399 
400   // For non power-of-2 types, they will very likely be legalized into multiple
401   // loads. Don't bother trying to match them into extending loads.
402   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
403     return false;
404 
405   // Find the preferred type aside from the any-extends (unless it's the only
406   // one) and non-extending ops. We'll emit an extending load to that type and
407   // and emit a variant of (extend (trunc X)) for the others according to the
408   // relative type sizes. At the same time, pick an extend to use based on the
409   // extend involved in the chosen type.
410   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
411                                  ? TargetOpcode::G_ANYEXT
412                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
413                                        ? TargetOpcode::G_SEXT
414                                        : TargetOpcode::G_ZEXT;
415   Preferred = {LLT(), PreferredOpcode, nullptr};
416   for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
417     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
418         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
419         (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
420       // Check for legality.
421       if (LI) {
422         LegalityQuery::MemDesc MMDesc;
423         const auto &MMO = **MI.memoperands_begin();
424         MMDesc.SizeInBits = MMO.getSizeInBits();
425         MMDesc.AlignInBits = MMO.getAlign().value() * 8;
426         MMDesc.Ordering = MMO.getOrdering();
427         LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
428         LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
429         if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
430             LegalizeActions::Legal)
431           continue;
432       }
433       Preferred = ChoosePreferredUse(Preferred,
434                                      MRI.getType(UseMI.getOperand(0).getReg()),
435                                      UseMI.getOpcode(), &UseMI);
436     }
437   }
438 
439   // There were no extends
440   if (!Preferred.MI)
441     return false;
442   // It should be impossible to chose an extend without selecting a different
443   // type since by definition the result of an extend is larger.
444   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
445 
446   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
447   return true;
448 }
449 
450 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
451                                                 PreferredTuple &Preferred) {
452   // Rewrite the load to the chosen extending load.
453   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
454 
455   // Inserter to insert a truncate back to the original type at a given point
456   // with some basic CSE to limit truncate duplication to one per BB.
457   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
458   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
459                            MachineBasicBlock::iterator InsertBefore,
460                            MachineOperand &UseMO) {
461     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
462     if (PreviouslyEmitted) {
463       Observer.changingInstr(*UseMO.getParent());
464       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
465       Observer.changedInstr(*UseMO.getParent());
466       return;
467     }
468 
469     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
470     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
471     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
472     EmittedInsns[InsertIntoBB] = NewMI;
473     replaceRegOpWith(MRI, UseMO, NewDstReg);
474   };
475 
476   Observer.changingInstr(MI);
477   MI.setDesc(
478       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
479                                ? TargetOpcode::G_SEXTLOAD
480                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
481                                      ? TargetOpcode::G_ZEXTLOAD
482                                      : TargetOpcode::G_LOAD));
483 
484   // Rewrite all the uses to fix up the types.
485   auto &LoadValue = MI.getOperand(0);
486   SmallVector<MachineOperand *, 4> Uses;
487   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
488     Uses.push_back(&UseMO);
489 
490   for (auto *UseMO : Uses) {
491     MachineInstr *UseMI = UseMO->getParent();
492 
493     // If the extend is compatible with the preferred extend then we should fix
494     // up the type and extend so that it uses the preferred use.
495     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
496         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
497       Register UseDstReg = UseMI->getOperand(0).getReg();
498       MachineOperand &UseSrcMO = UseMI->getOperand(1);
499       const LLT UseDstTy = MRI.getType(UseDstReg);
500       if (UseDstReg != ChosenDstReg) {
501         if (Preferred.Ty == UseDstTy) {
502           // If the use has the same type as the preferred use, then merge
503           // the vregs and erase the extend. For example:
504           //    %1:_(s8) = G_LOAD ...
505           //    %2:_(s32) = G_SEXT %1(s8)
506           //    %3:_(s32) = G_ANYEXT %1(s8)
507           //    ... = ... %3(s32)
508           // rewrites to:
509           //    %2:_(s32) = G_SEXTLOAD ...
510           //    ... = ... %2(s32)
511           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
512           Observer.erasingInstr(*UseMO->getParent());
513           UseMO->getParent()->eraseFromParent();
514         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
515           // If the preferred size is smaller, then keep the extend but extend
516           // from the result of the extending load. For example:
517           //    %1:_(s8) = G_LOAD ...
518           //    %2:_(s32) = G_SEXT %1(s8)
519           //    %3:_(s64) = G_ANYEXT %1(s8)
520           //    ... = ... %3(s64)
521           /// rewrites to:
522           //    %2:_(s32) = G_SEXTLOAD ...
523           //    %3:_(s64) = G_ANYEXT %2:_(s32)
524           //    ... = ... %3(s64)
525           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
526         } else {
527           // If the preferred size is large, then insert a truncate. For
528           // example:
529           //    %1:_(s8) = G_LOAD ...
530           //    %2:_(s64) = G_SEXT %1(s8)
531           //    %3:_(s32) = G_ZEXT %1(s8)
532           //    ... = ... %3(s32)
533           /// rewrites to:
534           //    %2:_(s64) = G_SEXTLOAD ...
535           //    %4:_(s8) = G_TRUNC %2:_(s32)
536           //    %3:_(s64) = G_ZEXT %2:_(s8)
537           //    ... = ... %3(s64)
538           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
539                                                  InsertTruncAt);
540         }
541         continue;
542       }
543       // The use is (one of) the uses of the preferred use we chose earlier.
544       // We're going to update the load to def this value later so just erase
545       // the old extend.
546       Observer.erasingInstr(*UseMO->getParent());
547       UseMO->getParent()->eraseFromParent();
548       continue;
549     }
550 
551     // The use isn't an extend. Truncate back to the type we originally loaded.
552     // This is free on many targets.
553     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
554   }
555 
556   MI.getOperand(0).setReg(ChosenDstReg);
557   Observer.changedInstr(MI);
558 }
559 
560 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
561                                    const MachineInstr &UseMI) {
562   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
563          "shouldn't consider debug uses");
564   assert(DefMI.getParent() == UseMI.getParent());
565   if (&DefMI == &UseMI)
566     return false;
567 
568   // Loop through the basic block until we find one of the instructions.
569   MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
570   for (; &*I != &DefMI && &*I != &UseMI; ++I)
571     return &*I == &DefMI;
572 
573   llvm_unreachable("Block must contain instructions");
574 }
575 
576 bool CombinerHelper::dominates(const MachineInstr &DefMI,
577                                const MachineInstr &UseMI) {
578   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
579          "shouldn't consider debug uses");
580   if (MDT)
581     return MDT->dominates(&DefMI, &UseMI);
582   else if (DefMI.getParent() != UseMI.getParent())
583     return false;
584 
585   return isPredecessor(DefMI, UseMI);
586 }
587 
588 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
589   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
590   Register SrcReg = MI.getOperand(1).getReg();
591   Register LoadUser = SrcReg;
592 
593   if (MRI.getType(SrcReg).isVector())
594     return false;
595 
596   Register TruncSrc;
597   if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
598     LoadUser = TruncSrc;
599 
600   uint64_t SizeInBits = MI.getOperand(2).getImm();
601   // If the source is a G_SEXTLOAD from the same bit width, then we don't
602   // need any extend at all, just a truncate.
603   if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
604     const auto &MMO = **LoadMI->memoperands_begin();
605     // If truncating more than the original extended value, abort.
606     if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
607       return false;
608     if (MMO.getSizeInBits() == SizeInBits)
609       return true;
610   }
611   return false;
612 }
613 
614 bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
615   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
616   Builder.setInstrAndDebugLoc(MI);
617   Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
618   MI.eraseFromParent();
619   return true;
620 }
621 
622 bool CombinerHelper::matchSextInRegOfLoad(
623     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
624   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
625 
626   // Only supports scalars for now.
627   if (MRI.getType(MI.getOperand(0).getReg()).isVector())
628     return false;
629 
630   Register SrcReg = MI.getOperand(1).getReg();
631   MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
632   if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
633     return false;
634 
635   // If the sign extend extends from a narrower width than the load's width,
636   // then we can narrow the load width when we combine to a G_SEXTLOAD.
637   auto &MMO = **LoadDef->memoperands_begin();
638   // Don't do this for non-simple loads.
639   if (MMO.isAtomic() || MMO.isVolatile())
640     return false;
641 
642   // Avoid widening the load at all.
643   unsigned NewSizeBits =
644       std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
645 
646   // Don't generate G_SEXTLOADs with a < 1 byte width.
647   if (NewSizeBits < 8)
648     return false;
649   // Don't bother creating a non-power-2 sextload, it will likely be broken up
650   // anyway for most targets.
651   if (!isPowerOf2_32(NewSizeBits))
652     return false;
653   MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
654   return true;
655 }
656 
657 bool CombinerHelper::applySextInRegOfLoad(
658     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
659   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
660   Register LoadReg;
661   unsigned ScalarSizeBits;
662   std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
663   auto *LoadDef = MRI.getVRegDef(LoadReg);
664   assert(LoadDef && "Expected a load reg");
665 
666   // If we have the following:
667   // %ld = G_LOAD %ptr, (load 2)
668   // %ext = G_SEXT_INREG %ld, 8
669   //    ==>
670   // %ld = G_SEXTLOAD %ptr (load 1)
671 
672   auto &MMO = **LoadDef->memoperands_begin();
673   Builder.setInstrAndDebugLoc(MI);
674   auto &MF = Builder.getMF();
675   auto PtrInfo = MMO.getPointerInfo();
676   auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
677   Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
678                          LoadDef->getOperand(1).getReg(), *NewMMO);
679   MI.eraseFromParent();
680   return true;
681 }
682 
683 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
684                                             Register &Base, Register &Offset) {
685   auto &MF = *MI.getParent()->getParent();
686   const auto &TLI = *MF.getSubtarget().getTargetLowering();
687 
688 #ifndef NDEBUG
689   unsigned Opcode = MI.getOpcode();
690   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
691          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
692 #endif
693 
694   Base = MI.getOperand(1).getReg();
695   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
696   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
697     return false;
698 
699   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
700   // FIXME: The following use traversal needs a bail out for patholigical cases.
701   for (auto &Use : MRI.use_nodbg_instructions(Base)) {
702     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
703       continue;
704 
705     Offset = Use.getOperand(2).getReg();
706     if (!ForceLegalIndexing &&
707         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
708       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
709                         << Use);
710       continue;
711     }
712 
713     // Make sure the offset calculation is before the potentially indexed op.
714     // FIXME: we really care about dependency here. The offset calculation might
715     // be movable.
716     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
717     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
718       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
719                         << Use);
720       continue;
721     }
722 
723     // FIXME: check whether all uses of Base are load/store with foldable
724     // addressing modes. If so, using the normal addr-modes is better than
725     // forming an indexed one.
726 
727     bool MemOpDominatesAddrUses = true;
728     for (auto &PtrAddUse :
729          MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
730       if (!dominates(MI, PtrAddUse)) {
731         MemOpDominatesAddrUses = false;
732         break;
733       }
734     }
735 
736     if (!MemOpDominatesAddrUses) {
737       LLVM_DEBUG(
738           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
739                  << Use);
740       continue;
741     }
742 
743     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
744     Addr = Use.getOperand(0).getReg();
745     return true;
746   }
747 
748   return false;
749 }
750 
751 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
752                                            Register &Base, Register &Offset) {
753   auto &MF = *MI.getParent()->getParent();
754   const auto &TLI = *MF.getSubtarget().getTargetLowering();
755 
756 #ifndef NDEBUG
757   unsigned Opcode = MI.getOpcode();
758   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
759          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
760 #endif
761 
762   Addr = MI.getOperand(1).getReg();
763   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
764   if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
765     return false;
766 
767   Base = AddrDef->getOperand(1).getReg();
768   Offset = AddrDef->getOperand(2).getReg();
769 
770   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
771 
772   if (!ForceLegalIndexing &&
773       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
774     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
775     return false;
776   }
777 
778   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
779   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
780     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
781     return false;
782   }
783 
784   if (MI.getOpcode() == TargetOpcode::G_STORE) {
785     // Would require a copy.
786     if (Base == MI.getOperand(0).getReg()) {
787       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
788       return false;
789     }
790 
791     // We're expecting one use of Addr in MI, but it could also be the
792     // value stored, which isn't actually dominated by the instruction.
793     if (MI.getOperand(0).getReg() == Addr) {
794       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
795       return false;
796     }
797   }
798 
799   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
800   // That might allow us to end base's liveness here by adjusting the constant.
801 
802   for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
803     if (!dominates(MI, UseMI)) {
804       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
805       return false;
806     }
807   }
808 
809   return true;
810 }
811 
812 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
813   IndexedLoadStoreMatchInfo MatchInfo;
814   if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
815     applyCombineIndexedLoadStore(MI, MatchInfo);
816     return true;
817   }
818   return false;
819 }
820 
821 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
822   unsigned Opcode = MI.getOpcode();
823   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
824       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
825     return false;
826 
827   // For now, no targets actually support these opcodes so don't waste time
828   // running these unless we're forced to for testing.
829   if (!ForceLegalIndexing)
830     return false;
831 
832   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
833                                           MatchInfo.Offset);
834   if (!MatchInfo.IsPre &&
835       !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
836                               MatchInfo.Offset))
837     return false;
838 
839   return true;
840 }
841 
842 void CombinerHelper::applyCombineIndexedLoadStore(
843     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
844   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
845   MachineIRBuilder MIRBuilder(MI);
846   unsigned Opcode = MI.getOpcode();
847   bool IsStore = Opcode == TargetOpcode::G_STORE;
848   unsigned NewOpcode;
849   switch (Opcode) {
850   case TargetOpcode::G_LOAD:
851     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
852     break;
853   case TargetOpcode::G_SEXTLOAD:
854     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
855     break;
856   case TargetOpcode::G_ZEXTLOAD:
857     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
858     break;
859   case TargetOpcode::G_STORE:
860     NewOpcode = TargetOpcode::G_INDEXED_STORE;
861     break;
862   default:
863     llvm_unreachable("Unknown load/store opcode");
864   }
865 
866   auto MIB = MIRBuilder.buildInstr(NewOpcode);
867   if (IsStore) {
868     MIB.addDef(MatchInfo.Addr);
869     MIB.addUse(MI.getOperand(0).getReg());
870   } else {
871     MIB.addDef(MI.getOperand(0).getReg());
872     MIB.addDef(MatchInfo.Addr);
873   }
874 
875   MIB.addUse(MatchInfo.Base);
876   MIB.addUse(MatchInfo.Offset);
877   MIB.addImm(MatchInfo.IsPre);
878   MI.eraseFromParent();
879   AddrDef.eraseFromParent();
880 
881   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
882 }
883 
884 bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
885   if (MI.getOpcode() != TargetOpcode::G_BR)
886     return false;
887 
888   // Try to match the following:
889   // bb1:
890   //   %c(s32) = G_ICMP pred, %a, %b
891   //   %c1(s1) = G_TRUNC %c(s32)
892   //   G_BRCOND %c1, %bb2
893   //   G_BR %bb3
894   // bb2:
895   // ...
896   // bb3:
897 
898   // The above pattern does not have a fall through to the successor bb2, always
899   // resulting in a branch no matter which path is taken. Here we try to find
900   // and replace that pattern with conditional branch to bb3 and otherwise
901   // fallthrough to bb2.
902 
903   MachineBasicBlock *MBB = MI.getParent();
904   MachineBasicBlock::iterator BrIt(MI);
905   if (BrIt == MBB->begin())
906     return false;
907   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
908 
909   MachineInstr *BrCond = &*std::prev(BrIt);
910   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
911     return false;
912 
913   // Check that the next block is the conditional branch target.
914   if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
915     return false;
916 
917   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
918   if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
919       !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
920     return false;
921   return true;
922 }
923 
924 bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
925   if (!matchElideBrByInvertingCond(MI))
926     return false;
927   applyElideBrByInvertingCond(MI);
928   return true;
929 }
930 
931 void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
932   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
933   MachineBasicBlock::iterator BrIt(MI);
934   MachineInstr *BrCond = &*std::prev(BrIt);
935   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
936 
937   CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
938       (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
939 
940   // Invert the G_ICMP condition.
941   Observer.changingInstr(*CmpMI);
942   CmpMI->getOperand(1).setPredicate(InversePred);
943   Observer.changedInstr(*CmpMI);
944 
945   // Change the conditional branch target.
946   Observer.changingInstr(*BrCond);
947   BrCond->getOperand(1).setMBB(BrTarget);
948   Observer.changedInstr(*BrCond);
949   MI.eraseFromParent();
950 }
951 
952 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
953   // On Darwin, -Os means optimize for size without hurting performance, so
954   // only really optimize for size when -Oz (MinSize) is used.
955   if (MF.getTarget().getTargetTriple().isOSDarwin())
956     return MF.getFunction().hasMinSize();
957   return MF.getFunction().hasOptSize();
958 }
959 
960 // Returns a list of types to use for memory op lowering in MemOps. A partial
961 // port of findOptimalMemOpLowering in TargetLowering.
962 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
963                                           unsigned Limit, const MemOp &Op,
964                                           unsigned DstAS, unsigned SrcAS,
965                                           const AttributeList &FuncAttributes,
966                                           const TargetLowering &TLI) {
967   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
968     return false;
969 
970   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
971 
972   if (Ty == LLT()) {
973     // Use the largest scalar type whose alignment constraints are satisfied.
974     // We only need to check DstAlign here as SrcAlign is always greater or
975     // equal to DstAlign (or zero).
976     Ty = LLT::scalar(64);
977     if (Op.isFixedDstAlign())
978       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
979              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
980         Ty = LLT::scalar(Ty.getSizeInBytes());
981     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
982     // FIXME: check for the largest legal type we can load/store to.
983   }
984 
985   unsigned NumMemOps = 0;
986   uint64_t Size = Op.size();
987   while (Size) {
988     unsigned TySize = Ty.getSizeInBytes();
989     while (TySize > Size) {
990       // For now, only use non-vector load / store's for the left-over pieces.
991       LLT NewTy = Ty;
992       // FIXME: check for mem op safety and legality of the types. Not all of
993       // SDAGisms map cleanly to GISel concepts.
994       if (NewTy.isVector())
995         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
996       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
997       unsigned NewTySize = NewTy.getSizeInBytes();
998       assert(NewTySize > 0 && "Could not find appropriate type");
999 
1000       // If the new LLT cannot cover all of the remaining bits, then consider
1001       // issuing a (or a pair of) unaligned and overlapping load / store.
1002       bool Fast;
1003       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
1004       MVT VT = getMVTForLLT(Ty);
1005       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
1006           TLI.allowsMisalignedMemoryAccesses(
1007               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
1008               MachineMemOperand::MONone, &Fast) &&
1009           Fast)
1010         TySize = Size;
1011       else {
1012         Ty = NewTy;
1013         TySize = NewTySize;
1014       }
1015     }
1016 
1017     if (++NumMemOps > Limit)
1018       return false;
1019 
1020     MemOps.push_back(Ty);
1021     Size -= TySize;
1022   }
1023 
1024   return true;
1025 }
1026 
1027 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
1028   if (Ty.isVector())
1029     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
1030                                 Ty.getNumElements());
1031   return IntegerType::get(C, Ty.getSizeInBits());
1032 }
1033 
1034 // Get a vectorized representation of the memset value operand, GISel edition.
1035 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
1036   MachineRegisterInfo &MRI = *MIB.getMRI();
1037   unsigned NumBits = Ty.getScalarSizeInBits();
1038   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1039   if (!Ty.isVector() && ValVRegAndVal) {
1040     unsigned KnownVal = ValVRegAndVal->Value;
1041     APInt Scalar = APInt(8, KnownVal);
1042     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
1043     return MIB.buildConstant(Ty, SplatVal).getReg(0);
1044   }
1045 
1046   // Extend the byte value to the larger type, and then multiply by a magic
1047   // value 0x010101... in order to replicate it across every byte.
1048   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
1049   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
1050     return MIB.buildConstant(Ty, 0).getReg(0);
1051   }
1052 
1053   LLT ExtType = Ty.getScalarType();
1054   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
1055   if (NumBits > 8) {
1056     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
1057     auto MagicMI = MIB.buildConstant(ExtType, Magic);
1058     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
1059   }
1060 
1061   // For vector types create a G_BUILD_VECTOR.
1062   if (Ty.isVector())
1063     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
1064 
1065   return Val;
1066 }
1067 
1068 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
1069                                     Register Val, unsigned KnownLen,
1070                                     Align Alignment, bool IsVolatile) {
1071   auto &MF = *MI.getParent()->getParent();
1072   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1073   auto &DL = MF.getDataLayout();
1074   LLVMContext &C = MF.getFunction().getContext();
1075 
1076   assert(KnownLen != 0 && "Have a zero length memset length!");
1077 
1078   bool DstAlignCanChange = false;
1079   MachineFrameInfo &MFI = MF.getFrameInfo();
1080   bool OptSize = shouldLowerMemFuncForSize(MF);
1081 
1082   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1083   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1084     DstAlignCanChange = true;
1085 
1086   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
1087   std::vector<LLT> MemOps;
1088 
1089   const auto &DstMMO = **MI.memoperands_begin();
1090   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1091 
1092   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1093   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1094 
1095   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1096                                      MemOp::Set(KnownLen, DstAlignCanChange,
1097                                                 Alignment,
1098                                                 /*IsZeroMemset=*/IsZeroVal,
1099                                                 /*IsVolatile=*/IsVolatile),
1100                                      DstPtrInfo.getAddrSpace(), ~0u,
1101                                      MF.getFunction().getAttributes(), TLI))
1102     return false;
1103 
1104   if (DstAlignCanChange) {
1105     // Get an estimate of the type from the LLT.
1106     Type *IRTy = getTypeForLLT(MemOps[0], C);
1107     Align NewAlign = DL.getABITypeAlign(IRTy);
1108     if (NewAlign > Alignment) {
1109       Alignment = NewAlign;
1110       unsigned FI = FIDef->getOperand(1).getIndex();
1111       // Give the stack frame object a larger alignment if needed.
1112       if (MFI.getObjectAlign(FI) < Alignment)
1113         MFI.setObjectAlignment(FI, Alignment);
1114     }
1115   }
1116 
1117   MachineIRBuilder MIB(MI);
1118   // Find the largest store and generate the bit pattern for it.
1119   LLT LargestTy = MemOps[0];
1120   for (unsigned i = 1; i < MemOps.size(); i++)
1121     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1122       LargestTy = MemOps[i];
1123 
1124   // The memset stored value is always defined as an s8, so in order to make it
1125   // work with larger store types we need to repeat the bit pattern across the
1126   // wider type.
1127   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1128 
1129   if (!MemSetValue)
1130     return false;
1131 
1132   // Generate the stores. For each store type in the list, we generate the
1133   // matching store of that type to the destination address.
1134   LLT PtrTy = MRI.getType(Dst);
1135   unsigned DstOff = 0;
1136   unsigned Size = KnownLen;
1137   for (unsigned I = 0; I < MemOps.size(); I++) {
1138     LLT Ty = MemOps[I];
1139     unsigned TySize = Ty.getSizeInBytes();
1140     if (TySize > Size) {
1141       // Issuing an unaligned load / store pair that overlaps with the previous
1142       // pair. Adjust the offset accordingly.
1143       assert(I == MemOps.size() - 1 && I != 0);
1144       DstOff -= TySize - Size;
1145     }
1146 
1147     // If this store is smaller than the largest store see whether we can get
1148     // the smaller value for free with a truncate.
1149     Register Value = MemSetValue;
1150     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1151       MVT VT = getMVTForLLT(Ty);
1152       MVT LargestVT = getMVTForLLT(LargestTy);
1153       if (!LargestTy.isVector() && !Ty.isVector() &&
1154           TLI.isTruncateFree(LargestVT, VT))
1155         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1156       else
1157         Value = getMemsetValue(Val, Ty, MIB);
1158       if (!Value)
1159         return false;
1160     }
1161 
1162     auto *StoreMMO =
1163         MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1164 
1165     Register Ptr = Dst;
1166     if (DstOff != 0) {
1167       auto Offset =
1168           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1169       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1170     }
1171 
1172     MIB.buildStore(Value, Ptr, *StoreMMO);
1173     DstOff += Ty.getSizeInBytes();
1174     Size -= TySize;
1175   }
1176 
1177   MI.eraseFromParent();
1178   return true;
1179 }
1180 
1181 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1182                                     Register Src, unsigned KnownLen,
1183                                     Align DstAlign, Align SrcAlign,
1184                                     bool IsVolatile) {
1185   auto &MF = *MI.getParent()->getParent();
1186   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1187   auto &DL = MF.getDataLayout();
1188   LLVMContext &C = MF.getFunction().getContext();
1189 
1190   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1191 
1192   bool DstAlignCanChange = false;
1193   MachineFrameInfo &MFI = MF.getFrameInfo();
1194   bool OptSize = shouldLowerMemFuncForSize(MF);
1195   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1196 
1197   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1198   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1199     DstAlignCanChange = true;
1200 
1201   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1202   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1203   // if the memcpy is in a tail call position.
1204 
1205   unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1206   std::vector<LLT> MemOps;
1207 
1208   const auto &DstMMO = **MI.memoperands_begin();
1209   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1210   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1211   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1212 
1213   if (!findGISelOptimalMemOpLowering(
1214           MemOps, Limit,
1215           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1216                       IsVolatile),
1217           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1218           MF.getFunction().getAttributes(), TLI))
1219     return false;
1220 
1221   if (DstAlignCanChange) {
1222     // Get an estimate of the type from the LLT.
1223     Type *IRTy = getTypeForLLT(MemOps[0], C);
1224     Align NewAlign = DL.getABITypeAlign(IRTy);
1225 
1226     // Don't promote to an alignment that would require dynamic stack
1227     // realignment.
1228     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1229     if (!TRI->needsStackRealignment(MF))
1230       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1231         NewAlign = NewAlign / 2;
1232 
1233     if (NewAlign > Alignment) {
1234       Alignment = NewAlign;
1235       unsigned FI = FIDef->getOperand(1).getIndex();
1236       // Give the stack frame object a larger alignment if needed.
1237       if (MFI.getObjectAlign(FI) < Alignment)
1238         MFI.setObjectAlignment(FI, Alignment);
1239     }
1240   }
1241 
1242   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1243 
1244   MachineIRBuilder MIB(MI);
1245   // Now we need to emit a pair of load and stores for each of the types we've
1246   // collected. I.e. for each type, generate a load from the source pointer of
1247   // that type width, and then generate a corresponding store to the dest buffer
1248   // of that value loaded. This can result in a sequence of loads and stores
1249   // mixed types, depending on what the target specifies as good types to use.
1250   unsigned CurrOffset = 0;
1251   LLT PtrTy = MRI.getType(Src);
1252   unsigned Size = KnownLen;
1253   for (auto CopyTy : MemOps) {
1254     // Issuing an unaligned load / store pair  that overlaps with the previous
1255     // pair. Adjust the offset accordingly.
1256     if (CopyTy.getSizeInBytes() > Size)
1257       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1258 
1259     // Construct MMOs for the accesses.
1260     auto *LoadMMO =
1261         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1262     auto *StoreMMO =
1263         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1264 
1265     // Create the load.
1266     Register LoadPtr = Src;
1267     Register Offset;
1268     if (CurrOffset != 0) {
1269       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1270                    .getReg(0);
1271       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1272     }
1273     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1274 
1275     // Create the store.
1276     Register StorePtr =
1277         CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1278     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1279     CurrOffset += CopyTy.getSizeInBytes();
1280     Size -= CopyTy.getSizeInBytes();
1281   }
1282 
1283   MI.eraseFromParent();
1284   return true;
1285 }
1286 
1287 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1288                                      Register Src, unsigned KnownLen,
1289                                      Align DstAlign, Align SrcAlign,
1290                                      bool IsVolatile) {
1291   auto &MF = *MI.getParent()->getParent();
1292   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1293   auto &DL = MF.getDataLayout();
1294   LLVMContext &C = MF.getFunction().getContext();
1295 
1296   assert(KnownLen != 0 && "Have a zero length memmove length!");
1297 
1298   bool DstAlignCanChange = false;
1299   MachineFrameInfo &MFI = MF.getFrameInfo();
1300   bool OptSize = shouldLowerMemFuncForSize(MF);
1301   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1302 
1303   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1304   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1305     DstAlignCanChange = true;
1306 
1307   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1308   std::vector<LLT> MemOps;
1309 
1310   const auto &DstMMO = **MI.memoperands_begin();
1311   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1312   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1313   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1314 
1315   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1316   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1317   // same thing here.
1318   if (!findGISelOptimalMemOpLowering(
1319           MemOps, Limit,
1320           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1321                       /*IsVolatile*/ true),
1322           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1323           MF.getFunction().getAttributes(), TLI))
1324     return false;
1325 
1326   if (DstAlignCanChange) {
1327     // Get an estimate of the type from the LLT.
1328     Type *IRTy = getTypeForLLT(MemOps[0], C);
1329     Align NewAlign = DL.getABITypeAlign(IRTy);
1330 
1331     // Don't promote to an alignment that would require dynamic stack
1332     // realignment.
1333     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1334     if (!TRI->needsStackRealignment(MF))
1335       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1336         NewAlign = NewAlign / 2;
1337 
1338     if (NewAlign > Alignment) {
1339       Alignment = NewAlign;
1340       unsigned FI = FIDef->getOperand(1).getIndex();
1341       // Give the stack frame object a larger alignment if needed.
1342       if (MFI.getObjectAlign(FI) < Alignment)
1343         MFI.setObjectAlignment(FI, Alignment);
1344     }
1345   }
1346 
1347   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1348 
1349   MachineIRBuilder MIB(MI);
1350   // Memmove requires that we perform the loads first before issuing the stores.
1351   // Apart from that, this loop is pretty much doing the same thing as the
1352   // memcpy codegen function.
1353   unsigned CurrOffset = 0;
1354   LLT PtrTy = MRI.getType(Src);
1355   SmallVector<Register, 16> LoadVals;
1356   for (auto CopyTy : MemOps) {
1357     // Construct MMO for the load.
1358     auto *LoadMMO =
1359         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1360 
1361     // Create the load.
1362     Register LoadPtr = Src;
1363     if (CurrOffset != 0) {
1364       auto Offset =
1365           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1366       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1367     }
1368     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1369     CurrOffset += CopyTy.getSizeInBytes();
1370   }
1371 
1372   CurrOffset = 0;
1373   for (unsigned I = 0; I < MemOps.size(); ++I) {
1374     LLT CopyTy = MemOps[I];
1375     // Now store the values loaded.
1376     auto *StoreMMO =
1377         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1378 
1379     Register StorePtr = Dst;
1380     if (CurrOffset != 0) {
1381       auto Offset =
1382           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1383       StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1384     }
1385     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1386     CurrOffset += CopyTy.getSizeInBytes();
1387   }
1388   MI.eraseFromParent();
1389   return true;
1390 }
1391 
1392 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1393   const unsigned Opc = MI.getOpcode();
1394   // This combine is fairly complex so it's not written with a separate
1395   // matcher function.
1396   assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
1397           Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
1398 
1399   auto MMOIt = MI.memoperands_begin();
1400   const MachineMemOperand *MemOp = *MMOIt;
1401   bool IsVolatile = MemOp->isVolatile();
1402   // Don't try to optimize volatile.
1403   if (IsVolatile)
1404     return false;
1405 
1406   Align DstAlign = MemOp->getBaseAlign();
1407   Align SrcAlign;
1408   Register Dst = MI.getOperand(0).getReg();
1409   Register Src = MI.getOperand(1).getReg();
1410   Register Len = MI.getOperand(2).getReg();
1411 
1412   if (Opc != TargetOpcode::G_MEMSET) {
1413     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1414     MemOp = *(++MMOIt);
1415     SrcAlign = MemOp->getBaseAlign();
1416   }
1417 
1418   // See if this is a constant length copy
1419   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1420   if (!LenVRegAndVal)
1421     return false; // Leave it to the legalizer to lower it to a libcall.
1422   unsigned KnownLen = LenVRegAndVal->Value;
1423 
1424   if (KnownLen == 0) {
1425     MI.eraseFromParent();
1426     return true;
1427   }
1428 
1429   if (MaxLen && KnownLen > MaxLen)
1430     return false;
1431 
1432   if (Opc == TargetOpcode::G_MEMCPY)
1433     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1434   if (Opc == TargetOpcode::G_MEMMOVE)
1435     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1436   if (Opc == TargetOpcode::G_MEMSET)
1437     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1438   return false;
1439 }
1440 
1441 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1442                                            PtrAddChain &MatchInfo) {
1443   // We're trying to match the following pattern:
1444   //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1445   //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1446   // -->
1447   //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1448 
1449   if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1450     return false;
1451 
1452   Register Add2 = MI.getOperand(1).getReg();
1453   Register Imm1 = MI.getOperand(2).getReg();
1454   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1455   if (!MaybeImmVal)
1456     return false;
1457 
1458   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1459   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1460     return false;
1461 
1462   Register Base = Add2Def->getOperand(1).getReg();
1463   Register Imm2 = Add2Def->getOperand(2).getReg();
1464   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1465   if (!MaybeImm2Val)
1466     return false;
1467 
1468   // Pass the combined immediate to the apply function.
1469   MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1470   MatchInfo.Base = Base;
1471   return true;
1472 }
1473 
1474 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1475                                            PtrAddChain &MatchInfo) {
1476   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1477   MachineIRBuilder MIB(MI);
1478   LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1479   auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1480   Observer.changingInstr(MI);
1481   MI.getOperand(1).setReg(MatchInfo.Base);
1482   MI.getOperand(2).setReg(NewOffset.getReg(0));
1483   Observer.changedInstr(MI);
1484   return true;
1485 }
1486 
1487 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1488                                           unsigned &ShiftVal) {
1489   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1490   auto MaybeImmVal =
1491       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1492   if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
1493     return false;
1494   ShiftVal = Log2_64(MaybeImmVal->Value);
1495   return true;
1496 }
1497 
1498 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1499                                           unsigned &ShiftVal) {
1500   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1501   MachineIRBuilder MIB(MI);
1502   LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1503   auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1504   Observer.changingInstr(MI);
1505   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1506   MI.getOperand(2).setReg(ShiftCst.getReg(0));
1507   Observer.changedInstr(MI);
1508   return true;
1509 }
1510 
1511 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1512 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
1513                                              RegisterImmPair &MatchData) {
1514   assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1515 
1516   Register LHS = MI.getOperand(1).getReg();
1517 
1518   Register ExtSrc;
1519   if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1520       !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1521       !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1522     return false;
1523 
1524   // TODO: Should handle vector splat.
1525   Register RHS = MI.getOperand(2).getReg();
1526   auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
1527   if (!MaybeShiftAmtVal)
1528     return false;
1529 
1530   if (LI) {
1531     LLT SrcTy = MRI.getType(ExtSrc);
1532 
1533     // We only really care about the legality with the shifted value. We can
1534     // pick any type the constant shift amount, so ask the target what to
1535     // use. Otherwise we would have to guess and hope it is reported as legal.
1536     LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1537     if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1538       return false;
1539   }
1540 
1541   int64_t ShiftAmt = MaybeShiftAmtVal->Value;
1542   MatchData.Reg = ExtSrc;
1543   MatchData.Imm = ShiftAmt;
1544 
1545   unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
1546   return MinLeadingZeros >= ShiftAmt;
1547 }
1548 
1549 bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
1550                                              const RegisterImmPair &MatchData) {
1551   Register ExtSrcReg = MatchData.Reg;
1552   int64_t ShiftAmtVal = MatchData.Imm;
1553 
1554   LLT ExtSrcTy = MRI.getType(ExtSrcReg);
1555   Builder.setInstrAndDebugLoc(MI);
1556   auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
1557   auto NarrowShift =
1558       Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
1559   Builder.buildZExt(MI.getOperand(0), NarrowShift);
1560   MI.eraseFromParent();
1561   return true;
1562 }
1563 
1564 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
1565                                                 unsigned TargetShiftSize,
1566                                                 unsigned &ShiftVal) {
1567   assert((MI.getOpcode() == TargetOpcode::G_SHL ||
1568           MI.getOpcode() == TargetOpcode::G_LSHR ||
1569           MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
1570 
1571   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1572   if (Ty.isVector()) // TODO:
1573     return false;
1574 
1575   // Don't narrow further than the requested size.
1576   unsigned Size = Ty.getSizeInBits();
1577   if (Size <= TargetShiftSize)
1578     return false;
1579 
1580   auto MaybeImmVal =
1581     getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1582   if (!MaybeImmVal)
1583     return false;
1584 
1585   ShiftVal = MaybeImmVal->Value;
1586   return ShiftVal >= Size / 2 && ShiftVal < Size;
1587 }
1588 
1589 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
1590                                                 const unsigned &ShiftVal) {
1591   Register DstReg = MI.getOperand(0).getReg();
1592   Register SrcReg = MI.getOperand(1).getReg();
1593   LLT Ty = MRI.getType(SrcReg);
1594   unsigned Size = Ty.getSizeInBits();
1595   unsigned HalfSize = Size / 2;
1596   assert(ShiftVal >= HalfSize);
1597 
1598   LLT HalfTy = LLT::scalar(HalfSize);
1599 
1600   Builder.setInstr(MI);
1601   auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
1602   unsigned NarrowShiftAmt = ShiftVal - HalfSize;
1603 
1604   if (MI.getOpcode() == TargetOpcode::G_LSHR) {
1605     Register Narrowed = Unmerge.getReg(1);
1606 
1607     //  dst = G_LSHR s64:x, C for C >= 32
1608     // =>
1609     //   lo, hi = G_UNMERGE_VALUES x
1610     //   dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
1611 
1612     if (NarrowShiftAmt != 0) {
1613       Narrowed = Builder.buildLShr(HalfTy, Narrowed,
1614         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1615     }
1616 
1617     auto Zero = Builder.buildConstant(HalfTy, 0);
1618     Builder.buildMerge(DstReg, { Narrowed, Zero });
1619   } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
1620     Register Narrowed = Unmerge.getReg(0);
1621     //  dst = G_SHL s64:x, C for C >= 32
1622     // =>
1623     //   lo, hi = G_UNMERGE_VALUES x
1624     //   dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
1625     if (NarrowShiftAmt != 0) {
1626       Narrowed = Builder.buildShl(HalfTy, Narrowed,
1627         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1628     }
1629 
1630     auto Zero = Builder.buildConstant(HalfTy, 0);
1631     Builder.buildMerge(DstReg, { Zero, Narrowed });
1632   } else {
1633     assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1634     auto Hi = Builder.buildAShr(
1635       HalfTy, Unmerge.getReg(1),
1636       Builder.buildConstant(HalfTy, HalfSize - 1));
1637 
1638     if (ShiftVal == HalfSize) {
1639       // (G_ASHR i64:x, 32) ->
1640       //   G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
1641       Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
1642     } else if (ShiftVal == Size - 1) {
1643       // Don't need a second shift.
1644       // (G_ASHR i64:x, 63) ->
1645       //   %narrowed = (G_ASHR hi_32(x), 31)
1646       //   G_MERGE_VALUES %narrowed, %narrowed
1647       Builder.buildMerge(DstReg, { Hi, Hi });
1648     } else {
1649       auto Lo = Builder.buildAShr(
1650         HalfTy, Unmerge.getReg(1),
1651         Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
1652 
1653       // (G_ASHR i64:x, C) ->, for C >= 32
1654       //   G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
1655       Builder.buildMerge(DstReg, { Lo, Hi });
1656     }
1657   }
1658 
1659   MI.eraseFromParent();
1660   return true;
1661 }
1662 
1663 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
1664                                               unsigned TargetShiftAmount) {
1665   unsigned ShiftAmt;
1666   if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
1667     applyCombineShiftToUnmerge(MI, ShiftAmt);
1668     return true;
1669   }
1670 
1671   return false;
1672 }
1673 
1674 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
1675   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
1676   Register DstReg = MI.getOperand(0).getReg();
1677   LLT DstTy = MRI.getType(DstReg);
1678   Register SrcReg = MI.getOperand(1).getReg();
1679   return mi_match(SrcReg, MRI,
1680                   m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
1681 }
1682 
1683 bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
1684   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
1685   Register DstReg = MI.getOperand(0).getReg();
1686   Builder.setInstr(MI);
1687   Builder.buildCopy(DstReg, Reg);
1688   MI.eraseFromParent();
1689   return true;
1690 }
1691 
1692 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
1693   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
1694   Register SrcReg = MI.getOperand(1).getReg();
1695   return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
1696 }
1697 
1698 bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
1699   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
1700   Register DstReg = MI.getOperand(0).getReg();
1701   Builder.setInstr(MI);
1702   Builder.buildZExtOrTrunc(DstReg, Reg);
1703   MI.eraseFromParent();
1704   return true;
1705 }
1706 
1707 bool CombinerHelper::matchCombineAddP2IToPtrAdd(
1708     MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
1709   assert(MI.getOpcode() == TargetOpcode::G_ADD);
1710   Register LHS = MI.getOperand(1).getReg();
1711   Register RHS = MI.getOperand(2).getReg();
1712   LLT IntTy = MRI.getType(LHS);
1713 
1714   // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
1715   // instruction.
1716   PtrReg.second = false;
1717   for (Register SrcReg : {LHS, RHS}) {
1718     if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
1719       // Don't handle cases where the integer is implicitly converted to the
1720       // pointer width.
1721       LLT PtrTy = MRI.getType(PtrReg.first);
1722       if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
1723         return true;
1724     }
1725 
1726     PtrReg.second = true;
1727   }
1728 
1729   return false;
1730 }
1731 
1732 bool CombinerHelper::applyCombineAddP2IToPtrAdd(
1733     MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
1734   Register Dst = MI.getOperand(0).getReg();
1735   Register LHS = MI.getOperand(1).getReg();
1736   Register RHS = MI.getOperand(2).getReg();
1737 
1738   const bool DoCommute = PtrReg.second;
1739   if (DoCommute)
1740     std::swap(LHS, RHS);
1741   LHS = PtrReg.first;
1742 
1743   LLT PtrTy = MRI.getType(LHS);
1744 
1745   Builder.setInstrAndDebugLoc(MI);
1746   auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
1747   Builder.buildPtrToInt(Dst, PtrAdd);
1748   MI.eraseFromParent();
1749   return true;
1750 }
1751 
1752 bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
1753   assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
1754   Register DstReg = MI.getOperand(0).getReg();
1755   Register SrcReg = MI.getOperand(1).getReg();
1756   LLT DstTy = MRI.getType(DstReg);
1757   return mi_match(SrcReg, MRI,
1758                   m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
1759 }
1760 
1761 bool CombinerHelper::applyCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
1762   assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
1763   Register DstReg = MI.getOperand(0).getReg();
1764   MI.eraseFromParent();
1765   replaceRegWith(MRI, DstReg, Reg);
1766   return true;
1767 }
1768 
1769 bool CombinerHelper::matchCombineExtOfExt(
1770     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1771   assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
1772           MI.getOpcode() == TargetOpcode::G_SEXT ||
1773           MI.getOpcode() == TargetOpcode::G_ZEXT) &&
1774          "Expected a G_[ASZ]EXT");
1775   Register SrcReg = MI.getOperand(1).getReg();
1776   MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
1777   // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
1778   unsigned Opc = MI.getOpcode();
1779   unsigned SrcOpc = SrcMI->getOpcode();
1780   if (Opc == SrcOpc ||
1781       (Opc == TargetOpcode::G_ANYEXT &&
1782        (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
1783       (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
1784     MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
1785     return true;
1786   }
1787   return false;
1788 }
1789 
1790 bool CombinerHelper::applyCombineExtOfExt(
1791     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
1792   assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
1793           MI.getOpcode() == TargetOpcode::G_SEXT ||
1794           MI.getOpcode() == TargetOpcode::G_ZEXT) &&
1795          "Expected a G_[ASZ]EXT");
1796 
1797   Register Reg = std::get<0>(MatchInfo);
1798   unsigned SrcExtOp = std::get<1>(MatchInfo);
1799 
1800   // Combine exts with the same opcode.
1801   if (MI.getOpcode() == SrcExtOp) {
1802     Observer.changingInstr(MI);
1803     MI.getOperand(1).setReg(Reg);
1804     Observer.changedInstr(MI);
1805     return true;
1806   }
1807 
1808   // Combine:
1809   // - anyext([sz]ext x) to [sz]ext x
1810   // - sext(zext x) to zext x
1811   if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
1812       (MI.getOpcode() == TargetOpcode::G_SEXT &&
1813        SrcExtOp == TargetOpcode::G_ZEXT)) {
1814     Register DstReg = MI.getOperand(0).getReg();
1815     Builder.setInstrAndDebugLoc(MI);
1816     Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
1817     MI.eraseFromParent();
1818     return true;
1819   }
1820 
1821   return false;
1822 }
1823 
1824 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
1825   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1826     return MO.isReg() &&
1827            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1828   });
1829 }
1830 
1831 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
1832   return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1833     return !MO.isReg() ||
1834            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1835   });
1836 }
1837 
1838 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
1839   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
1840   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
1841   return all_of(Mask, [](int Elt) { return Elt < 0; });
1842 }
1843 
1844 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
1845   assert(MI.getOpcode() == TargetOpcode::G_STORE);
1846   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
1847                       MRI);
1848 }
1849 
1850 bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
1851   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
1852   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
1853                       MRI);
1854 }
1855 
1856 bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
1857   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
1858   if (auto MaybeCstCmp =
1859           getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
1860     OpIdx = MaybeCstCmp->Value ? 2 : 3;
1861     return true;
1862   }
1863   return false;
1864 }
1865 
1866 bool CombinerHelper::eraseInst(MachineInstr &MI) {
1867   MI.eraseFromParent();
1868   return true;
1869 }
1870 
1871 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
1872                                     const MachineOperand &MOP2) {
1873   if (!MOP1.isReg() || !MOP2.isReg())
1874     return false;
1875   MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
1876   if (!I1)
1877     return false;
1878   MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
1879   if (!I2)
1880     return false;
1881 
1882   // Handle a case like this:
1883   //
1884   // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
1885   //
1886   // Even though %0 and %1 are produced by the same instruction they are not
1887   // the same values.
1888   if (I1 == I2)
1889     return MOP1.getReg() == MOP2.getReg();
1890 
1891   // If we have an instruction which loads or stores, we can't guarantee that
1892   // it is identical.
1893   //
1894   // For example, we may have
1895   //
1896   // %x1 = G_LOAD %addr (load N from @somewhere)
1897   // ...
1898   // call @foo
1899   // ...
1900   // %x2 = G_LOAD %addr (load N from @somewhere)
1901   // ...
1902   // %or = G_OR %x1, %x2
1903   //
1904   // It's possible that @foo will modify whatever lives at the address we're
1905   // loading from. To be safe, let's just assume that all loads and stores
1906   // are different (unless we have something which is guaranteed to not
1907   // change.)
1908   if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
1909     return false;
1910 
1911   // Check for physical registers on the instructions first to avoid cases
1912   // like this:
1913   //
1914   // %a = COPY $physreg
1915   // ...
1916   // SOMETHING implicit-def $physreg
1917   // ...
1918   // %b = COPY $physreg
1919   //
1920   // These copies are not equivalent.
1921   if (any_of(I1->uses(), [](const MachineOperand &MO) {
1922         return MO.isReg() && MO.getReg().isPhysical();
1923       })) {
1924     // Check if we have a case like this:
1925     //
1926     // %a = COPY $physreg
1927     // %b = COPY %a
1928     //
1929     // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
1930     // From that, we know that they must have the same value, since they must
1931     // have come from the same COPY.
1932     return I1->isIdenticalTo(*I2);
1933   }
1934 
1935   // We don't have any physical registers, so we don't necessarily need the
1936   // same vreg defs.
1937   //
1938   // On the off-chance that there's some target instruction feeding into the
1939   // instruction, let's use produceSameValue instead of isIdenticalTo.
1940   return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
1941 }
1942 
1943 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
1944   if (!MOP.isReg())
1945     return false;
1946   // MIPatternMatch doesn't let us look through G_ZEXT etc.
1947   auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
1948   return ValAndVReg && ValAndVReg->Value == C;
1949 }
1950 
1951 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
1952                                                      unsigned OpIdx) {
1953   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
1954   Register OldReg = MI.getOperand(0).getReg();
1955   Register Replacement = MI.getOperand(OpIdx).getReg();
1956   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
1957   MI.eraseFromParent();
1958   replaceRegWith(MRI, OldReg, Replacement);
1959   return true;
1960 }
1961 
1962 bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
1963                                                  Register Replacement) {
1964   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
1965   Register OldReg = MI.getOperand(0).getReg();
1966   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
1967   MI.eraseFromParent();
1968   replaceRegWith(MRI, OldReg, Replacement);
1969   return true;
1970 }
1971 
1972 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
1973   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
1974   // Match (cond ? x : x)
1975   return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
1976          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
1977                        MRI);
1978 }
1979 
1980 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
1981   return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
1982          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1983                        MRI);
1984 }
1985 
1986 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
1987   return matchConstantOp(MI.getOperand(OpIdx), 0) &&
1988          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
1989                        MRI);
1990 }
1991 
1992 bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
1993   MachineOperand &MO = MI.getOperand(OpIdx);
1994   return MO.isReg() &&
1995          getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1996 }
1997 
1998 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
1999   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2000   Builder.setInstr(MI);
2001   Builder.buildFConstant(MI.getOperand(0), C);
2002   MI.eraseFromParent();
2003   return true;
2004 }
2005 
2006 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
2007   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2008   Builder.setInstr(MI);
2009   Builder.buildConstant(MI.getOperand(0), C);
2010   MI.eraseFromParent();
2011   return true;
2012 }
2013 
2014 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
2015   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2016   Builder.setInstr(MI);
2017   Builder.buildUndef(MI.getOperand(0));
2018   MI.eraseFromParent();
2019   return true;
2020 }
2021 
2022 bool CombinerHelper::matchSimplifyAddToSub(
2023     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2024   Register LHS = MI.getOperand(1).getReg();
2025   Register RHS = MI.getOperand(2).getReg();
2026   Register &NewLHS = std::get<0>(MatchInfo);
2027   Register &NewRHS = std::get<1>(MatchInfo);
2028 
2029   // Helper lambda to check for opportunities for
2030   // ((0-A) + B) -> B - A
2031   // (A + (0-B)) -> A - B
2032   auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2033     int64_t Cst;
2034     if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
2035         Cst != 0)
2036       return false;
2037     NewLHS = MaybeNewLHS;
2038     return true;
2039   };
2040 
2041   return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2042 }
2043 
2044 bool CombinerHelper::applySimplifyAddToSub(
2045     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2046   Builder.setInstr(MI);
2047   Register SubLHS, SubRHS;
2048   std::tie(SubLHS, SubRHS) = MatchInfo;
2049   Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2050   MI.eraseFromParent();
2051   return true;
2052 }
2053 
2054 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
2055     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2056   // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2057   //
2058   // Creates the new hand + logic instruction (but does not insert them.)
2059   //
2060   // On success, MatchInfo is populated with the new instructions. These are
2061   // inserted in applyHoistLogicOpWithSameOpcodeHands.
2062   unsigned LogicOpcode = MI.getOpcode();
2063   assert(LogicOpcode == TargetOpcode::G_AND ||
2064          LogicOpcode == TargetOpcode::G_OR ||
2065          LogicOpcode == TargetOpcode::G_XOR);
2066   MachineIRBuilder MIB(MI);
2067   Register Dst = MI.getOperand(0).getReg();
2068   Register LHSReg = MI.getOperand(1).getReg();
2069   Register RHSReg = MI.getOperand(2).getReg();
2070 
2071   // Don't recompute anything.
2072   if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
2073     return false;
2074 
2075   // Make sure we have (hand x, ...), (hand y, ...)
2076   MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
2077   MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
2078   if (!LeftHandInst || !RightHandInst)
2079     return false;
2080   unsigned HandOpcode = LeftHandInst->getOpcode();
2081   if (HandOpcode != RightHandInst->getOpcode())
2082     return false;
2083   if (!LeftHandInst->getOperand(1).isReg() ||
2084       !RightHandInst->getOperand(1).isReg())
2085     return false;
2086 
2087   // Make sure the types match up, and if we're doing this post-legalization,
2088   // we end up with legal types.
2089   Register X = LeftHandInst->getOperand(1).getReg();
2090   Register Y = RightHandInst->getOperand(1).getReg();
2091   LLT XTy = MRI.getType(X);
2092   LLT YTy = MRI.getType(Y);
2093   if (XTy != YTy)
2094     return false;
2095   if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
2096     return false;
2097 
2098   // Optional extra source register.
2099   Register ExtraHandOpSrcReg;
2100   switch (HandOpcode) {
2101   default:
2102     return false;
2103   case TargetOpcode::G_ANYEXT:
2104   case TargetOpcode::G_SEXT:
2105   case TargetOpcode::G_ZEXT: {
2106     // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
2107     break;
2108   }
2109   case TargetOpcode::G_AND:
2110   case TargetOpcode::G_ASHR:
2111   case TargetOpcode::G_LSHR:
2112   case TargetOpcode::G_SHL: {
2113     // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
2114     MachineOperand &ZOp = LeftHandInst->getOperand(2);
2115     if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
2116       return false;
2117     ExtraHandOpSrcReg = ZOp.getReg();
2118     break;
2119   }
2120   }
2121 
2122   // Record the steps to build the new instructions.
2123   //
2124   // Steps to build (logic x, y)
2125   auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
2126   OperandBuildSteps LogicBuildSteps = {
2127       [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
2128       [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
2129       [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
2130   InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
2131 
2132   // Steps to build hand (logic x, y), ...z
2133   OperandBuildSteps HandBuildSteps = {
2134       [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
2135       [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
2136   if (ExtraHandOpSrcReg.isValid())
2137     HandBuildSteps.push_back(
2138         [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
2139   InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
2140 
2141   MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
2142   return true;
2143 }
2144 
2145 bool CombinerHelper::applyBuildInstructionSteps(
2146     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2147   assert(MatchInfo.InstrsToBuild.size() &&
2148          "Expected at least one instr to build?");
2149   Builder.setInstr(MI);
2150   for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
2151     assert(InstrToBuild.Opcode && "Expected a valid opcode?");
2152     assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
2153     MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
2154     for (auto &OperandFn : InstrToBuild.OperandFns)
2155       OperandFn(Instr);
2156   }
2157   MI.eraseFromParent();
2158   return true;
2159 }
2160 
2161 bool CombinerHelper::matchAshrShlToSextInreg(
2162     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
2163   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2164   int64_t ShlCst, AshrCst;
2165   Register Src;
2166   // FIXME: detect splat constant vectors.
2167   if (!mi_match(MI.getOperand(0).getReg(), MRI,
2168                 m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
2169     return false;
2170   if (ShlCst != AshrCst)
2171     return false;
2172   if (!isLegalOrBeforeLegalizer(
2173           {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
2174     return false;
2175   MatchInfo = std::make_tuple(Src, ShlCst);
2176   return true;
2177 }
2178 bool CombinerHelper::applyAshShlToSextInreg(
2179     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
2180   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2181   Register Src;
2182   int64_t ShiftAmt;
2183   std::tie(Src, ShiftAmt) = MatchInfo;
2184   unsigned Size = MRI.getType(Src).getScalarSizeInBits();
2185   Builder.setInstrAndDebugLoc(MI);
2186   Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
2187   MI.eraseFromParent();
2188   return true;
2189 }
2190 
2191 bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI,
2192                                              Register &Replacement) {
2193   // Given
2194   //
2195   // %mask:_(sN) = G_CONSTANT iN 000...0111...1
2196   // %x:_(sN) = G_SOMETHING
2197   // %y:_(sN) = G_AND %x, %mask
2198   //
2199   // Eliminate the G_AND when it is known that x & mask == x.
2200   //
2201   // Patterns like this can appear as a result of legalization. E.g.
2202   //
2203   // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
2204   // %one:_(s32) = G_CONSTANT i32 1
2205   // %and:_(s32) = G_AND %cmp, %one
2206   //
2207   // In this case, G_ICMP only produces a single bit, so x & 1 == x.
2208   assert(MI.getOpcode() == TargetOpcode::G_AND);
2209   if (!KB)
2210     return false;
2211 
2212   // Replacement = %x, AndDst = %y. Check that we can replace AndDst with the
2213   // LHS of the G_AND.
2214   Replacement = MI.getOperand(1).getReg();
2215   Register AndDst = MI.getOperand(0).getReg();
2216   LLT DstTy = MRI.getType(AndDst);
2217 
2218   // FIXME: This should be removed once GISelKnownBits supports vectors.
2219   if (DstTy.isVector())
2220     return false;
2221   if (!canReplaceReg(AndDst, Replacement, MRI))
2222     return false;
2223 
2224   // Check that we have a constant on the RHS of the G_AND, which is of the form
2225   // 000...0111...1.
2226   int64_t Cst;
2227   if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Cst)))
2228     return false;
2229   APInt Mask(DstTy.getSizeInBits(), Cst);
2230   if (!Mask.isMask())
2231     return false;
2232 
2233   // Now, let's check that x & Mask == x. If this is true, then x & ~Mask == 0.
2234   return KB->maskedValueIsZero(Replacement, ~Mask);
2235 }
2236 
2237 bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
2238   // If the input is already sign extended, just drop the extension.
2239   Register Src = MI.getOperand(1).getReg();
2240   unsigned ExtBits = MI.getOperand(2).getImm();
2241   unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
2242   return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
2243 }
2244 
2245 static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
2246                              int64_t Cst, bool IsVector, bool IsFP) {
2247   // For i1, Cst will always be -1 regardless of boolean contents.
2248   return (ScalarSizeBits == 1 && Cst == -1) ||
2249          isConstTrueVal(TLI, Cst, IsVector, IsFP);
2250 }
2251 
2252 bool CombinerHelper::matchNotCmp(MachineInstr &MI,
2253                                  SmallVectorImpl<Register> &RegsToNegate) {
2254   assert(MI.getOpcode() == TargetOpcode::G_XOR);
2255   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2256   const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
2257   Register XorSrc;
2258   Register CstReg;
2259   // We match xor(src, true) here.
2260   if (!mi_match(MI.getOperand(0).getReg(), MRI,
2261                 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
2262     return false;
2263 
2264   if (!MRI.hasOneNonDBGUse(XorSrc))
2265     return false;
2266 
2267   // Check that XorSrc is the root of a tree of comparisons combined with ANDs
2268   // and ORs. The suffix of RegsToNegate starting from index I is used a work
2269   // list of tree nodes to visit.
2270   RegsToNegate.push_back(XorSrc);
2271   // Remember whether the comparisons are all integer or all floating point.
2272   bool IsInt = false;
2273   bool IsFP = false;
2274   for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
2275     Register Reg = RegsToNegate[I];
2276     if (!MRI.hasOneNonDBGUse(Reg))
2277       return false;
2278     MachineInstr *Def = MRI.getVRegDef(Reg);
2279     switch (Def->getOpcode()) {
2280     default:
2281       // Don't match if the tree contains anything other than ANDs, ORs and
2282       // comparisons.
2283       return false;
2284     case TargetOpcode::G_ICMP:
2285       if (IsFP)
2286         return false;
2287       IsInt = true;
2288       // When we apply the combine we will invert the predicate.
2289       break;
2290     case TargetOpcode::G_FCMP:
2291       if (IsInt)
2292         return false;
2293       IsFP = true;
2294       // When we apply the combine we will invert the predicate.
2295       break;
2296     case TargetOpcode::G_AND:
2297     case TargetOpcode::G_OR:
2298       // Implement De Morgan's laws:
2299       // ~(x & y) -> ~x | ~y
2300       // ~(x | y) -> ~x & ~y
2301       // When we apply the combine we will change the opcode and recursively
2302       // negate the operands.
2303       RegsToNegate.push_back(Def->getOperand(1).getReg());
2304       RegsToNegate.push_back(Def->getOperand(2).getReg());
2305       break;
2306     }
2307   }
2308 
2309   // Now we know whether the comparisons are integer or floating point, check
2310   // the constant in the xor.
2311   int64_t Cst;
2312   if (Ty.isVector()) {
2313     MachineInstr *CstDef = MRI.getVRegDef(CstReg);
2314     auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
2315     if (!MaybeCst)
2316       return false;
2317     if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
2318       return false;
2319   } else {
2320     if (!mi_match(CstReg, MRI, m_ICst(Cst)))
2321       return false;
2322     if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
2323       return false;
2324   }
2325 
2326   return true;
2327 }
2328 
2329 bool CombinerHelper::applyNotCmp(MachineInstr &MI,
2330                                  SmallVectorImpl<Register> &RegsToNegate) {
2331   for (Register Reg : RegsToNegate) {
2332     MachineInstr *Def = MRI.getVRegDef(Reg);
2333     Observer.changingInstr(*Def);
2334     // For each comparison, invert the opcode. For each AND and OR, change the
2335     // opcode.
2336     switch (Def->getOpcode()) {
2337     default:
2338       llvm_unreachable("Unexpected opcode");
2339     case TargetOpcode::G_ICMP:
2340     case TargetOpcode::G_FCMP: {
2341       MachineOperand &PredOp = Def->getOperand(1);
2342       CmpInst::Predicate NewP = CmpInst::getInversePredicate(
2343           (CmpInst::Predicate)PredOp.getPredicate());
2344       PredOp.setPredicate(NewP);
2345       break;
2346     }
2347     case TargetOpcode::G_AND:
2348       Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
2349       break;
2350     case TargetOpcode::G_OR:
2351       Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
2352       break;
2353     }
2354     Observer.changedInstr(*Def);
2355   }
2356 
2357   replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
2358   MI.eraseFromParent();
2359   return true;
2360 }
2361 
2362 bool CombinerHelper::tryCombine(MachineInstr &MI) {
2363   if (tryCombineCopy(MI))
2364     return true;
2365   if (tryCombineExtendingLoads(MI))
2366     return true;
2367   if (tryCombineIndexedLoadStore(MI))
2368     return true;
2369   return false;
2370 }
2371