1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/CodeGen/GlobalISel/Combiner.h"
10 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
12 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
13 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
14 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
15 #include "llvm/CodeGen/GlobalISel/Utils.h"
16 #include "llvm/CodeGen/MachineDominators.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineMemOperand.h"
20 #include "llvm/CodeGen/MachineRegisterInfo.h"
21 #include "llvm/CodeGen/TargetInstrInfo.h"
22 #include "llvm/CodeGen/TargetLowering.h"
23 #include "llvm/Support/MathExtras.h"
24 #include "llvm/Target/TargetMachine.h"
25 
26 #define DEBUG_TYPE "gi-combiner"
27 
28 using namespace llvm;
29 using namespace MIPatternMatch;
30 
31 // Option to allow testing of the combiner while no targets know about indexed
32 // addressing.
33 static cl::opt<bool>
34     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
35                        cl::desc("Force all indexed operations to be "
36                                 "legal for the GlobalISel combiner"));
37 
38 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
39                                MachineIRBuilder &B, GISelKnownBits *KB,
40                                MachineDominatorTree *MDT,
41                                const LegalizerInfo *LI)
42     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
43       KB(KB), MDT(MDT), LI(LI) {
44   (void)this->KB;
45 }
46 
47 bool CombinerHelper::isLegalOrBeforeLegalizer(
48     const LegalityQuery &Query) const {
49   return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
50 }
51 
52 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
53                                     Register ToReg) const {
54   Observer.changingAllUsesOfReg(MRI, FromReg);
55 
56   if (MRI.constrainRegAttrs(ToReg, FromReg))
57     MRI.replaceRegWith(FromReg, ToReg);
58   else
59     Builder.buildCopy(ToReg, FromReg);
60 
61   Observer.finishedChangingAllUsesOfReg();
62 }
63 
64 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
65                                       MachineOperand &FromRegOp,
66                                       Register ToReg) const {
67   assert(FromRegOp.getParent() && "Expected an operand in an MI");
68   Observer.changingInstr(*FromRegOp.getParent());
69 
70   FromRegOp.setReg(ToReg);
71 
72   Observer.changedInstr(*FromRegOp.getParent());
73 }
74 
75 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
76   if (matchCombineCopy(MI)) {
77     applyCombineCopy(MI);
78     return true;
79   }
80   return false;
81 }
82 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
83   if (MI.getOpcode() != TargetOpcode::COPY)
84     return false;
85   Register DstReg = MI.getOperand(0).getReg();
86   Register SrcReg = MI.getOperand(1).getReg();
87   return canReplaceReg(DstReg, SrcReg, MRI);
88 }
89 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
90   Register DstReg = MI.getOperand(0).getReg();
91   Register SrcReg = MI.getOperand(1).getReg();
92   MI.eraseFromParent();
93   replaceRegWith(MRI, DstReg, SrcReg);
94 }
95 
96 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
97   bool IsUndef = false;
98   SmallVector<Register, 4> Ops;
99   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
100     applyCombineConcatVectors(MI, IsUndef, Ops);
101     return true;
102   }
103   return false;
104 }
105 
106 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
107                                                SmallVectorImpl<Register> &Ops) {
108   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
109          "Invalid instruction");
110   IsUndef = true;
111   MachineInstr *Undef = nullptr;
112 
113   // Walk over all the operands of concat vectors and check if they are
114   // build_vector themselves or undef.
115   // Then collect their operands in Ops.
116   for (const MachineOperand &MO : MI.uses()) {
117     Register Reg = MO.getReg();
118     MachineInstr *Def = MRI.getVRegDef(Reg);
119     assert(Def && "Operand not defined");
120     switch (Def->getOpcode()) {
121     case TargetOpcode::G_BUILD_VECTOR:
122       IsUndef = false;
123       // Remember the operands of the build_vector to fold
124       // them into the yet-to-build flattened concat vectors.
125       for (const MachineOperand &BuildVecMO : Def->uses())
126         Ops.push_back(BuildVecMO.getReg());
127       break;
128     case TargetOpcode::G_IMPLICIT_DEF: {
129       LLT OpType = MRI.getType(Reg);
130       // Keep one undef value for all the undef operands.
131       if (!Undef) {
132         Builder.setInsertPt(*MI.getParent(), MI);
133         Undef = Builder.buildUndef(OpType.getScalarType());
134       }
135       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
136                  OpType.getScalarType() &&
137              "All undefs should have the same type");
138       // Break the undef vector in as many scalar elements as needed
139       // for the flattening.
140       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
141            EltIdx != EltEnd; ++EltIdx)
142         Ops.push_back(Undef->getOperand(0).getReg());
143       break;
144     }
145     default:
146       return false;
147     }
148   }
149   return true;
150 }
151 void CombinerHelper::applyCombineConcatVectors(
152     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
153   // We determined that the concat_vectors can be flatten.
154   // Generate the flattened build_vector.
155   Register DstReg = MI.getOperand(0).getReg();
156   Builder.setInsertPt(*MI.getParent(), MI);
157   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
158 
159   // Note: IsUndef is sort of redundant. We could have determine it by
160   // checking that at all Ops are undef.  Alternatively, we could have
161   // generate a build_vector of undefs and rely on another combine to
162   // clean that up.  For now, given we already gather this information
163   // in tryCombineConcatVectors, just save compile time and issue the
164   // right thing.
165   if (IsUndef)
166     Builder.buildUndef(NewDstReg);
167   else
168     Builder.buildBuildVector(NewDstReg, Ops);
169   MI.eraseFromParent();
170   replaceRegWith(MRI, DstReg, NewDstReg);
171 }
172 
173 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
174   SmallVector<Register, 4> Ops;
175   if (matchCombineShuffleVector(MI, Ops)) {
176     applyCombineShuffleVector(MI, Ops);
177     return true;
178   }
179   return false;
180 }
181 
182 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
183                                                SmallVectorImpl<Register> &Ops) {
184   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
185          "Invalid instruction kind");
186   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
187   Register Src1 = MI.getOperand(1).getReg();
188   LLT SrcType = MRI.getType(Src1);
189   // As bizarre as it may look, shuffle vector can actually produce
190   // scalar! This is because at the IR level a <1 x ty> shuffle
191   // vector is perfectly valid.
192   unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
193   unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
194 
195   // If the resulting vector is smaller than the size of the source
196   // vectors being concatenated, we won't be able to replace the
197   // shuffle vector into a concat_vectors.
198   //
199   // Note: We may still be able to produce a concat_vectors fed by
200   //       extract_vector_elt and so on. It is less clear that would
201   //       be better though, so don't bother for now.
202   //
203   // If the destination is a scalar, the size of the sources doesn't
204   // matter. we will lower the shuffle to a plain copy. This will
205   // work only if the source and destination have the same size. But
206   // that's covered by the next condition.
207   //
208   // TODO: If the size between the source and destination don't match
209   //       we could still emit an extract vector element in that case.
210   if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
211     return false;
212 
213   // Check that the shuffle mask can be broken evenly between the
214   // different sources.
215   if (DstNumElts % SrcNumElts != 0)
216     return false;
217 
218   // Mask length is a multiple of the source vector length.
219   // Check if the shuffle is some kind of concatenation of the input
220   // vectors.
221   unsigned NumConcat = DstNumElts / SrcNumElts;
222   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
223   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
224   for (unsigned i = 0; i != DstNumElts; ++i) {
225     int Idx = Mask[i];
226     // Undef value.
227     if (Idx < 0)
228       continue;
229     // Ensure the indices in each SrcType sized piece are sequential and that
230     // the same source is used for the whole piece.
231     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
232         (ConcatSrcs[i / SrcNumElts] >= 0 &&
233          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
234       return false;
235     // Remember which source this index came from.
236     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
237   }
238 
239   // The shuffle is concatenating multiple vectors together.
240   // Collect the different operands for that.
241   Register UndefReg;
242   Register Src2 = MI.getOperand(2).getReg();
243   for (auto Src : ConcatSrcs) {
244     if (Src < 0) {
245       if (!UndefReg) {
246         Builder.setInsertPt(*MI.getParent(), MI);
247         UndefReg = Builder.buildUndef(SrcType).getReg(0);
248       }
249       Ops.push_back(UndefReg);
250     } else if (Src == 0)
251       Ops.push_back(Src1);
252     else
253       Ops.push_back(Src2);
254   }
255   return true;
256 }
257 
258 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
259                                                const ArrayRef<Register> Ops) {
260   Register DstReg = MI.getOperand(0).getReg();
261   Builder.setInsertPt(*MI.getParent(), MI);
262   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
263 
264   if (Ops.size() == 1)
265     Builder.buildCopy(NewDstReg, Ops[0]);
266   else
267     Builder.buildMerge(NewDstReg, Ops);
268 
269   MI.eraseFromParent();
270   replaceRegWith(MRI, DstReg, NewDstReg);
271 }
272 
273 namespace {
274 
275 /// Select a preference between two uses. CurrentUse is the current preference
276 /// while *ForCandidate is attributes of the candidate under consideration.
277 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
278                                   const LLT TyForCandidate,
279                                   unsigned OpcodeForCandidate,
280                                   MachineInstr *MIForCandidate) {
281   if (!CurrentUse.Ty.isValid()) {
282     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
283         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
284       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
285     return CurrentUse;
286   }
287 
288   // We permit the extend to hoist through basic blocks but this is only
289   // sensible if the target has extending loads. If you end up lowering back
290   // into a load and extend during the legalizer then the end result is
291   // hoisting the extend up to the load.
292 
293   // Prefer defined extensions to undefined extensions as these are more
294   // likely to reduce the number of instructions.
295   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
296       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
297     return CurrentUse;
298   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
299            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
300     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
301 
302   // Prefer sign extensions to zero extensions as sign-extensions tend to be
303   // more expensive.
304   if (CurrentUse.Ty == TyForCandidate) {
305     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
306         OpcodeForCandidate == TargetOpcode::G_ZEXT)
307       return CurrentUse;
308     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
309              OpcodeForCandidate == TargetOpcode::G_SEXT)
310       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
311   }
312 
313   // This is potentially target specific. We've chosen the largest type
314   // because G_TRUNC is usually free. One potential catch with this is that
315   // some targets have a reduced number of larger registers than smaller
316   // registers and this choice potentially increases the live-range for the
317   // larger value.
318   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
319     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
320   }
321   return CurrentUse;
322 }
323 
324 /// Find a suitable place to insert some instructions and insert them. This
325 /// function accounts for special cases like inserting before a PHI node.
326 /// The current strategy for inserting before PHI's is to duplicate the
327 /// instructions for each predecessor. However, while that's ok for G_TRUNC
328 /// on most targets since it generally requires no code, other targets/cases may
329 /// want to try harder to find a dominating block.
330 static void InsertInsnsWithoutSideEffectsBeforeUse(
331     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
332     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
333                        MachineOperand &UseMO)>
334         Inserter) {
335   MachineInstr &UseMI = *UseMO.getParent();
336 
337   MachineBasicBlock *InsertBB = UseMI.getParent();
338 
339   // If the use is a PHI then we want the predecessor block instead.
340   if (UseMI.isPHI()) {
341     MachineOperand *PredBB = std::next(&UseMO);
342     InsertBB = PredBB->getMBB();
343   }
344 
345   // If the block is the same block as the def then we want to insert just after
346   // the def instead of at the start of the block.
347   if (InsertBB == DefMI.getParent()) {
348     MachineBasicBlock::iterator InsertPt = &DefMI;
349     Inserter(InsertBB, std::next(InsertPt), UseMO);
350     return;
351   }
352 
353   // Otherwise we want the start of the BB
354   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
355 }
356 } // end anonymous namespace
357 
358 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
359   PreferredTuple Preferred;
360   if (matchCombineExtendingLoads(MI, Preferred)) {
361     applyCombineExtendingLoads(MI, Preferred);
362     return true;
363   }
364   return false;
365 }
366 
367 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
368                                                 PreferredTuple &Preferred) {
369   // We match the loads and follow the uses to the extend instead of matching
370   // the extends and following the def to the load. This is because the load
371   // must remain in the same position for correctness (unless we also add code
372   // to find a safe place to sink it) whereas the extend is freely movable.
373   // It also prevents us from duplicating the load for the volatile case or just
374   // for performance.
375 
376   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
377       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
378       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
379     return false;
380 
381   auto &LoadValue = MI.getOperand(0);
382   assert(LoadValue.isReg() && "Result wasn't a register?");
383 
384   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
385   if (!LoadValueTy.isScalar())
386     return false;
387 
388   // Most architectures are going to legalize <s8 loads into at least a 1 byte
389   // load, and the MMOs can only describe memory accesses in multiples of bytes.
390   // If we try to perform extload combining on those, we can end up with
391   // %a(s8) = extload %ptr (load 1 byte from %ptr)
392   // ... which is an illegal extload instruction.
393   if (LoadValueTy.getSizeInBits() < 8)
394     return false;
395 
396   // For non power-of-2 types, they will very likely be legalized into multiple
397   // loads. Don't bother trying to match them into extending loads.
398   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
399     return false;
400 
401   // Find the preferred type aside from the any-extends (unless it's the only
402   // one) and non-extending ops. We'll emit an extending load to that type and
403   // and emit a variant of (extend (trunc X)) for the others according to the
404   // relative type sizes. At the same time, pick an extend to use based on the
405   // extend involved in the chosen type.
406   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
407                                  ? TargetOpcode::G_ANYEXT
408                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
409                                        ? TargetOpcode::G_SEXT
410                                        : TargetOpcode::G_ZEXT;
411   Preferred = {LLT(), PreferredOpcode, nullptr};
412   for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
413     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
414         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
415         (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
416       // Check for legality.
417       if (LI) {
418         LegalityQuery::MemDesc MMDesc;
419         const auto &MMO = **MI.memoperands_begin();
420         MMDesc.SizeInBits = MMO.getSizeInBits();
421         MMDesc.AlignInBits = MMO.getAlign().value() * 8;
422         MMDesc.Ordering = MMO.getOrdering();
423         LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
424         LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
425         if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
426             LegalizeActions::Legal)
427           continue;
428       }
429       Preferred = ChoosePreferredUse(Preferred,
430                                      MRI.getType(UseMI.getOperand(0).getReg()),
431                                      UseMI.getOpcode(), &UseMI);
432     }
433   }
434 
435   // There were no extends
436   if (!Preferred.MI)
437     return false;
438   // It should be impossible to chose an extend without selecting a different
439   // type since by definition the result of an extend is larger.
440   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
441 
442   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
443   return true;
444 }
445 
446 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
447                                                 PreferredTuple &Preferred) {
448   // Rewrite the load to the chosen extending load.
449   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
450 
451   // Inserter to insert a truncate back to the original type at a given point
452   // with some basic CSE to limit truncate duplication to one per BB.
453   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
454   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
455                            MachineBasicBlock::iterator InsertBefore,
456                            MachineOperand &UseMO) {
457     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
458     if (PreviouslyEmitted) {
459       Observer.changingInstr(*UseMO.getParent());
460       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
461       Observer.changedInstr(*UseMO.getParent());
462       return;
463     }
464 
465     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
466     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
467     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
468     EmittedInsns[InsertIntoBB] = NewMI;
469     replaceRegOpWith(MRI, UseMO, NewDstReg);
470   };
471 
472   Observer.changingInstr(MI);
473   MI.setDesc(
474       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
475                                ? TargetOpcode::G_SEXTLOAD
476                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
477                                      ? TargetOpcode::G_ZEXTLOAD
478                                      : TargetOpcode::G_LOAD));
479 
480   // Rewrite all the uses to fix up the types.
481   auto &LoadValue = MI.getOperand(0);
482   SmallVector<MachineOperand *, 4> Uses;
483   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
484     Uses.push_back(&UseMO);
485 
486   for (auto *UseMO : Uses) {
487     MachineInstr *UseMI = UseMO->getParent();
488 
489     // If the extend is compatible with the preferred extend then we should fix
490     // up the type and extend so that it uses the preferred use.
491     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
492         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
493       Register UseDstReg = UseMI->getOperand(0).getReg();
494       MachineOperand &UseSrcMO = UseMI->getOperand(1);
495       const LLT UseDstTy = MRI.getType(UseDstReg);
496       if (UseDstReg != ChosenDstReg) {
497         if (Preferred.Ty == UseDstTy) {
498           // If the use has the same type as the preferred use, then merge
499           // the vregs and erase the extend. For example:
500           //    %1:_(s8) = G_LOAD ...
501           //    %2:_(s32) = G_SEXT %1(s8)
502           //    %3:_(s32) = G_ANYEXT %1(s8)
503           //    ... = ... %3(s32)
504           // rewrites to:
505           //    %2:_(s32) = G_SEXTLOAD ...
506           //    ... = ... %2(s32)
507           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
508           Observer.erasingInstr(*UseMO->getParent());
509           UseMO->getParent()->eraseFromParent();
510         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
511           // If the preferred size is smaller, then keep the extend but extend
512           // from the result of the extending load. For example:
513           //    %1:_(s8) = G_LOAD ...
514           //    %2:_(s32) = G_SEXT %1(s8)
515           //    %3:_(s64) = G_ANYEXT %1(s8)
516           //    ... = ... %3(s64)
517           /// rewrites to:
518           //    %2:_(s32) = G_SEXTLOAD ...
519           //    %3:_(s64) = G_ANYEXT %2:_(s32)
520           //    ... = ... %3(s64)
521           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
522         } else {
523           // If the preferred size is large, then insert a truncate. For
524           // example:
525           //    %1:_(s8) = G_LOAD ...
526           //    %2:_(s64) = G_SEXT %1(s8)
527           //    %3:_(s32) = G_ZEXT %1(s8)
528           //    ... = ... %3(s32)
529           /// rewrites to:
530           //    %2:_(s64) = G_SEXTLOAD ...
531           //    %4:_(s8) = G_TRUNC %2:_(s32)
532           //    %3:_(s64) = G_ZEXT %2:_(s8)
533           //    ... = ... %3(s64)
534           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
535                                                  InsertTruncAt);
536         }
537         continue;
538       }
539       // The use is (one of) the uses of the preferred use we chose earlier.
540       // We're going to update the load to def this value later so just erase
541       // the old extend.
542       Observer.erasingInstr(*UseMO->getParent());
543       UseMO->getParent()->eraseFromParent();
544       continue;
545     }
546 
547     // The use isn't an extend. Truncate back to the type we originally loaded.
548     // This is free on many targets.
549     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
550   }
551 
552   MI.getOperand(0).setReg(ChosenDstReg);
553   Observer.changedInstr(MI);
554 }
555 
556 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
557                                    const MachineInstr &UseMI) {
558   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
559          "shouldn't consider debug uses");
560   assert(DefMI.getParent() == UseMI.getParent());
561   if (&DefMI == &UseMI)
562     return false;
563 
564   // Loop through the basic block until we find one of the instructions.
565   MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
566   for (; &*I != &DefMI && &*I != &UseMI; ++I)
567     return &*I == &DefMI;
568 
569   llvm_unreachable("Block must contain instructions");
570 }
571 
572 bool CombinerHelper::dominates(const MachineInstr &DefMI,
573                                const MachineInstr &UseMI) {
574   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
575          "shouldn't consider debug uses");
576   if (MDT)
577     return MDT->dominates(&DefMI, &UseMI);
578   else if (DefMI.getParent() != UseMI.getParent())
579     return false;
580 
581   return isPredecessor(DefMI, UseMI);
582 }
583 
584 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
585   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
586   Register SrcReg = MI.getOperand(1).getReg();
587   Register LoadUser = SrcReg;
588 
589   if (MRI.getType(SrcReg).isVector())
590     return false;
591 
592   Register TruncSrc;
593   if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
594     LoadUser = TruncSrc;
595 
596   uint64_t SizeInBits = MI.getOperand(2).getImm();
597   // If the source is a G_SEXTLOAD from the same bit width, then we don't
598   // need any extend at all, just a truncate.
599   if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
600     const auto &MMO = **LoadMI->memoperands_begin();
601     // If truncating more than the original extended value, abort.
602     if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
603       return false;
604     if (MMO.getSizeInBits() == SizeInBits)
605       return true;
606   }
607   return false;
608 }
609 
610 bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
611   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
612   Builder.setInstrAndDebugLoc(MI);
613   Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
614   MI.eraseFromParent();
615   return true;
616 }
617 
618 bool CombinerHelper::matchSextInRegOfLoad(
619     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
620   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
621 
622   // Only supports scalars for now.
623   if (MRI.getType(MI.getOperand(0).getReg()).isVector())
624     return false;
625 
626   Register SrcReg = MI.getOperand(1).getReg();
627   MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
628   if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
629     return false;
630 
631   // If the sign extend extends from a narrower width than the load's width,
632   // then we can narrow the load width when we combine to a G_SEXTLOAD.
633   auto &MMO = **LoadDef->memoperands_begin();
634   // Don't do this for non-simple loads.
635   if (MMO.isAtomic() || MMO.isVolatile())
636     return false;
637 
638   // Avoid widening the load at all.
639   unsigned NewSizeBits =
640       std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
641 
642   // Don't generate G_SEXTLOADs with a < 1 byte width.
643   if (NewSizeBits < 8)
644     return false;
645   // Don't bother creating a non-power-2 sextload, it will likely be broken up
646   // anyway for most targets.
647   if (!isPowerOf2_32(NewSizeBits))
648     return false;
649   MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
650   return true;
651 }
652 
653 bool CombinerHelper::applySextInRegOfLoad(
654     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
655   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
656   Register LoadReg;
657   unsigned ScalarSizeBits;
658   std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
659   auto *LoadDef = MRI.getVRegDef(LoadReg);
660   assert(LoadDef && "Expected a load reg");
661 
662   // If we have the following:
663   // %ld = G_LOAD %ptr, (load 2)
664   // %ext = G_SEXT_INREG %ld, 8
665   //    ==>
666   // %ld = G_SEXTLOAD %ptr (load 1)
667 
668   auto &MMO = **LoadDef->memoperands_begin();
669   Builder.setInstrAndDebugLoc(MI);
670   auto &MF = Builder.getMF();
671   auto PtrInfo = MMO.getPointerInfo();
672   auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
673   Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
674                          LoadDef->getOperand(1).getReg(), *NewMMO);
675   MI.eraseFromParent();
676   return true;
677 }
678 
679 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
680                                             Register &Base, Register &Offset) {
681   auto &MF = *MI.getParent()->getParent();
682   const auto &TLI = *MF.getSubtarget().getTargetLowering();
683 
684 #ifndef NDEBUG
685   unsigned Opcode = MI.getOpcode();
686   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
687          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
688 #endif
689 
690   Base = MI.getOperand(1).getReg();
691   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
692   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
693     return false;
694 
695   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
696 
697   for (auto &Use : MRI.use_nodbg_instructions(Base)) {
698     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
699       continue;
700 
701     Offset = Use.getOperand(2).getReg();
702     if (!ForceLegalIndexing &&
703         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
704       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
705                         << Use);
706       continue;
707     }
708 
709     // Make sure the offset calculation is before the potentially indexed op.
710     // FIXME: we really care about dependency here. The offset calculation might
711     // be movable.
712     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
713     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
714       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
715                         << Use);
716       continue;
717     }
718 
719     // FIXME: check whether all uses of Base are load/store with foldable
720     // addressing modes. If so, using the normal addr-modes is better than
721     // forming an indexed one.
722 
723     bool MemOpDominatesAddrUses = true;
724     for (auto &PtrAddUse :
725          MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
726       if (!dominates(MI, PtrAddUse)) {
727         MemOpDominatesAddrUses = false;
728         break;
729       }
730     }
731 
732     if (!MemOpDominatesAddrUses) {
733       LLVM_DEBUG(
734           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
735                  << Use);
736       continue;
737     }
738 
739     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
740     Addr = Use.getOperand(0).getReg();
741     return true;
742   }
743 
744   return false;
745 }
746 
747 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
748                                            Register &Base, Register &Offset) {
749   auto &MF = *MI.getParent()->getParent();
750   const auto &TLI = *MF.getSubtarget().getTargetLowering();
751 
752 #ifndef NDEBUG
753   unsigned Opcode = MI.getOpcode();
754   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
755          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
756 #endif
757 
758   Addr = MI.getOperand(1).getReg();
759   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
760   if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
761     return false;
762 
763   Base = AddrDef->getOperand(1).getReg();
764   Offset = AddrDef->getOperand(2).getReg();
765 
766   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
767 
768   if (!ForceLegalIndexing &&
769       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
770     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
771     return false;
772   }
773 
774   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
775   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
776     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
777     return false;
778   }
779 
780   if (MI.getOpcode() == TargetOpcode::G_STORE) {
781     // Would require a copy.
782     if (Base == MI.getOperand(0).getReg()) {
783       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
784       return false;
785     }
786 
787     // We're expecting one use of Addr in MI, but it could also be the
788     // value stored, which isn't actually dominated by the instruction.
789     if (MI.getOperand(0).getReg() == Addr) {
790       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
791       return false;
792     }
793   }
794 
795   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
796   // That might allow us to end base's liveness here by adjusting the constant.
797 
798   for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
799     if (!dominates(MI, UseMI)) {
800       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
801       return false;
802     }
803   }
804 
805   return true;
806 }
807 
808 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
809   IndexedLoadStoreMatchInfo MatchInfo;
810   if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
811     applyCombineIndexedLoadStore(MI, MatchInfo);
812     return true;
813   }
814   return false;
815 }
816 
817 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
818   unsigned Opcode = MI.getOpcode();
819   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
820       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
821     return false;
822 
823   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
824                                           MatchInfo.Offset);
825   if (!MatchInfo.IsPre &&
826       !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
827                               MatchInfo.Offset))
828     return false;
829 
830   return true;
831 }
832 
833 void CombinerHelper::applyCombineIndexedLoadStore(
834     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
835   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
836   MachineIRBuilder MIRBuilder(MI);
837   unsigned Opcode = MI.getOpcode();
838   bool IsStore = Opcode == TargetOpcode::G_STORE;
839   unsigned NewOpcode;
840   switch (Opcode) {
841   case TargetOpcode::G_LOAD:
842     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
843     break;
844   case TargetOpcode::G_SEXTLOAD:
845     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
846     break;
847   case TargetOpcode::G_ZEXTLOAD:
848     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
849     break;
850   case TargetOpcode::G_STORE:
851     NewOpcode = TargetOpcode::G_INDEXED_STORE;
852     break;
853   default:
854     llvm_unreachable("Unknown load/store opcode");
855   }
856 
857   auto MIB = MIRBuilder.buildInstr(NewOpcode);
858   if (IsStore) {
859     MIB.addDef(MatchInfo.Addr);
860     MIB.addUse(MI.getOperand(0).getReg());
861   } else {
862     MIB.addDef(MI.getOperand(0).getReg());
863     MIB.addDef(MatchInfo.Addr);
864   }
865 
866   MIB.addUse(MatchInfo.Base);
867   MIB.addUse(MatchInfo.Offset);
868   MIB.addImm(MatchInfo.IsPre);
869   MI.eraseFromParent();
870   AddrDef.eraseFromParent();
871 
872   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
873 }
874 
875 bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
876   if (MI.getOpcode() != TargetOpcode::G_BR)
877     return false;
878 
879   // Try to match the following:
880   // bb1:
881   //   %c(s32) = G_ICMP pred, %a, %b
882   //   %c1(s1) = G_TRUNC %c(s32)
883   //   G_BRCOND %c1, %bb2
884   //   G_BR %bb3
885   // bb2:
886   // ...
887   // bb3:
888 
889   // The above pattern does not have a fall through to the successor bb2, always
890   // resulting in a branch no matter which path is taken. Here we try to find
891   // and replace that pattern with conditional branch to bb3 and otherwise
892   // fallthrough to bb2.
893 
894   MachineBasicBlock *MBB = MI.getParent();
895   MachineBasicBlock::iterator BrIt(MI);
896   if (BrIt == MBB->begin())
897     return false;
898   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
899 
900   MachineInstr *BrCond = &*std::prev(BrIt);
901   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
902     return false;
903 
904   // Check that the next block is the conditional branch target.
905   if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
906     return false;
907 
908   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
909   if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
910       !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
911     return false;
912   return true;
913 }
914 
915 bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
916   if (!matchElideBrByInvertingCond(MI))
917     return false;
918   applyElideBrByInvertingCond(MI);
919   return true;
920 }
921 
922 void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
923   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
924   MachineBasicBlock::iterator BrIt(MI);
925   MachineInstr *BrCond = &*std::prev(BrIt);
926   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
927 
928   CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
929       (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
930 
931   // Invert the G_ICMP condition.
932   Observer.changingInstr(*CmpMI);
933   CmpMI->getOperand(1).setPredicate(InversePred);
934   Observer.changedInstr(*CmpMI);
935 
936   // Change the conditional branch target.
937   Observer.changingInstr(*BrCond);
938   BrCond->getOperand(1).setMBB(BrTarget);
939   Observer.changedInstr(*BrCond);
940   MI.eraseFromParent();
941 }
942 
943 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
944   // On Darwin, -Os means optimize for size without hurting performance, so
945   // only really optimize for size when -Oz (MinSize) is used.
946   if (MF.getTarget().getTargetTriple().isOSDarwin())
947     return MF.getFunction().hasMinSize();
948   return MF.getFunction().hasOptSize();
949 }
950 
951 // Returns a list of types to use for memory op lowering in MemOps. A partial
952 // port of findOptimalMemOpLowering in TargetLowering.
953 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
954                                           unsigned Limit, const MemOp &Op,
955                                           unsigned DstAS, unsigned SrcAS,
956                                           const AttributeList &FuncAttributes,
957                                           const TargetLowering &TLI) {
958   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
959     return false;
960 
961   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
962 
963   if (Ty == LLT()) {
964     // Use the largest scalar type whose alignment constraints are satisfied.
965     // We only need to check DstAlign here as SrcAlign is always greater or
966     // equal to DstAlign (or zero).
967     Ty = LLT::scalar(64);
968     if (Op.isFixedDstAlign())
969       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
970              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
971         Ty = LLT::scalar(Ty.getSizeInBytes());
972     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
973     // FIXME: check for the largest legal type we can load/store to.
974   }
975 
976   unsigned NumMemOps = 0;
977   uint64_t Size = Op.size();
978   while (Size) {
979     unsigned TySize = Ty.getSizeInBytes();
980     while (TySize > Size) {
981       // For now, only use non-vector load / store's for the left-over pieces.
982       LLT NewTy = Ty;
983       // FIXME: check for mem op safety and legality of the types. Not all of
984       // SDAGisms map cleanly to GISel concepts.
985       if (NewTy.isVector())
986         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
987       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
988       unsigned NewTySize = NewTy.getSizeInBytes();
989       assert(NewTySize > 0 && "Could not find appropriate type");
990 
991       // If the new LLT cannot cover all of the remaining bits, then consider
992       // issuing a (or a pair of) unaligned and overlapping load / store.
993       bool Fast;
994       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
995       MVT VT = getMVTForLLT(Ty);
996       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
997           TLI.allowsMisalignedMemoryAccesses(
998               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
999               MachineMemOperand::MONone, &Fast) &&
1000           Fast)
1001         TySize = Size;
1002       else {
1003         Ty = NewTy;
1004         TySize = NewTySize;
1005       }
1006     }
1007 
1008     if (++NumMemOps > Limit)
1009       return false;
1010 
1011     MemOps.push_back(Ty);
1012     Size -= TySize;
1013   }
1014 
1015   return true;
1016 }
1017 
1018 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
1019   if (Ty.isVector())
1020     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
1021                                 Ty.getNumElements());
1022   return IntegerType::get(C, Ty.getSizeInBits());
1023 }
1024 
1025 // Get a vectorized representation of the memset value operand, GISel edition.
1026 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
1027   MachineRegisterInfo &MRI = *MIB.getMRI();
1028   unsigned NumBits = Ty.getScalarSizeInBits();
1029   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1030   if (!Ty.isVector() && ValVRegAndVal) {
1031     unsigned KnownVal = ValVRegAndVal->Value;
1032     APInt Scalar = APInt(8, KnownVal);
1033     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
1034     return MIB.buildConstant(Ty, SplatVal).getReg(0);
1035   }
1036 
1037   // Extend the byte value to the larger type, and then multiply by a magic
1038   // value 0x010101... in order to replicate it across every byte.
1039   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
1040   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
1041     return MIB.buildConstant(Ty, 0).getReg(0);
1042   }
1043 
1044   LLT ExtType = Ty.getScalarType();
1045   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
1046   if (NumBits > 8) {
1047     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
1048     auto MagicMI = MIB.buildConstant(ExtType, Magic);
1049     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
1050   }
1051 
1052   // For vector types create a G_BUILD_VECTOR.
1053   if (Ty.isVector())
1054     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
1055 
1056   return Val;
1057 }
1058 
1059 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
1060                                     Register Val, unsigned KnownLen,
1061                                     Align Alignment, bool IsVolatile) {
1062   auto &MF = *MI.getParent()->getParent();
1063   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1064   auto &DL = MF.getDataLayout();
1065   LLVMContext &C = MF.getFunction().getContext();
1066 
1067   assert(KnownLen != 0 && "Have a zero length memset length!");
1068 
1069   bool DstAlignCanChange = false;
1070   MachineFrameInfo &MFI = MF.getFrameInfo();
1071   bool OptSize = shouldLowerMemFuncForSize(MF);
1072 
1073   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1074   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1075     DstAlignCanChange = true;
1076 
1077   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
1078   std::vector<LLT> MemOps;
1079 
1080   const auto &DstMMO = **MI.memoperands_begin();
1081   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1082 
1083   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1084   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1085 
1086   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1087                                      MemOp::Set(KnownLen, DstAlignCanChange,
1088                                                 Alignment,
1089                                                 /*IsZeroMemset=*/IsZeroVal,
1090                                                 /*IsVolatile=*/IsVolatile),
1091                                      DstPtrInfo.getAddrSpace(), ~0u,
1092                                      MF.getFunction().getAttributes(), TLI))
1093     return false;
1094 
1095   if (DstAlignCanChange) {
1096     // Get an estimate of the type from the LLT.
1097     Type *IRTy = getTypeForLLT(MemOps[0], C);
1098     Align NewAlign = DL.getABITypeAlign(IRTy);
1099     if (NewAlign > Alignment) {
1100       Alignment = NewAlign;
1101       unsigned FI = FIDef->getOperand(1).getIndex();
1102       // Give the stack frame object a larger alignment if needed.
1103       if (MFI.getObjectAlign(FI) < Alignment)
1104         MFI.setObjectAlignment(FI, Alignment);
1105     }
1106   }
1107 
1108   MachineIRBuilder MIB(MI);
1109   // Find the largest store and generate the bit pattern for it.
1110   LLT LargestTy = MemOps[0];
1111   for (unsigned i = 1; i < MemOps.size(); i++)
1112     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1113       LargestTy = MemOps[i];
1114 
1115   // The memset stored value is always defined as an s8, so in order to make it
1116   // work with larger store types we need to repeat the bit pattern across the
1117   // wider type.
1118   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1119 
1120   if (!MemSetValue)
1121     return false;
1122 
1123   // Generate the stores. For each store type in the list, we generate the
1124   // matching store of that type to the destination address.
1125   LLT PtrTy = MRI.getType(Dst);
1126   unsigned DstOff = 0;
1127   unsigned Size = KnownLen;
1128   for (unsigned I = 0; I < MemOps.size(); I++) {
1129     LLT Ty = MemOps[I];
1130     unsigned TySize = Ty.getSizeInBytes();
1131     if (TySize > Size) {
1132       // Issuing an unaligned load / store pair that overlaps with the previous
1133       // pair. Adjust the offset accordingly.
1134       assert(I == MemOps.size() - 1 && I != 0);
1135       DstOff -= TySize - Size;
1136     }
1137 
1138     // If this store is smaller than the largest store see whether we can get
1139     // the smaller value for free with a truncate.
1140     Register Value = MemSetValue;
1141     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1142       MVT VT = getMVTForLLT(Ty);
1143       MVT LargestVT = getMVTForLLT(LargestTy);
1144       if (!LargestTy.isVector() && !Ty.isVector() &&
1145           TLI.isTruncateFree(LargestVT, VT))
1146         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1147       else
1148         Value = getMemsetValue(Val, Ty, MIB);
1149       if (!Value)
1150         return false;
1151     }
1152 
1153     auto *StoreMMO =
1154         MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1155 
1156     Register Ptr = Dst;
1157     if (DstOff != 0) {
1158       auto Offset =
1159           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1160       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1161     }
1162 
1163     MIB.buildStore(Value, Ptr, *StoreMMO);
1164     DstOff += Ty.getSizeInBytes();
1165     Size -= TySize;
1166   }
1167 
1168   MI.eraseFromParent();
1169   return true;
1170 }
1171 
1172 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1173                                     Register Src, unsigned KnownLen,
1174                                     Align DstAlign, Align SrcAlign,
1175                                     bool IsVolatile) {
1176   auto &MF = *MI.getParent()->getParent();
1177   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1178   auto &DL = MF.getDataLayout();
1179   LLVMContext &C = MF.getFunction().getContext();
1180 
1181   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1182 
1183   bool DstAlignCanChange = false;
1184   MachineFrameInfo &MFI = MF.getFrameInfo();
1185   bool OptSize = shouldLowerMemFuncForSize(MF);
1186   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1187 
1188   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1189   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1190     DstAlignCanChange = true;
1191 
1192   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1193   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1194   // if the memcpy is in a tail call position.
1195 
1196   unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1197   std::vector<LLT> MemOps;
1198 
1199   const auto &DstMMO = **MI.memoperands_begin();
1200   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1201   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1202   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1203 
1204   if (!findGISelOptimalMemOpLowering(
1205           MemOps, Limit,
1206           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1207                       IsVolatile),
1208           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1209           MF.getFunction().getAttributes(), TLI))
1210     return false;
1211 
1212   if (DstAlignCanChange) {
1213     // Get an estimate of the type from the LLT.
1214     Type *IRTy = getTypeForLLT(MemOps[0], C);
1215     Align NewAlign = DL.getABITypeAlign(IRTy);
1216 
1217     // Don't promote to an alignment that would require dynamic stack
1218     // realignment.
1219     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1220     if (!TRI->needsStackRealignment(MF))
1221       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1222         NewAlign = NewAlign / 2;
1223 
1224     if (NewAlign > Alignment) {
1225       Alignment = NewAlign;
1226       unsigned FI = FIDef->getOperand(1).getIndex();
1227       // Give the stack frame object a larger alignment if needed.
1228       if (MFI.getObjectAlign(FI) < Alignment)
1229         MFI.setObjectAlignment(FI, Alignment);
1230     }
1231   }
1232 
1233   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1234 
1235   MachineIRBuilder MIB(MI);
1236   // Now we need to emit a pair of load and stores for each of the types we've
1237   // collected. I.e. for each type, generate a load from the source pointer of
1238   // that type width, and then generate a corresponding store to the dest buffer
1239   // of that value loaded. This can result in a sequence of loads and stores
1240   // mixed types, depending on what the target specifies as good types to use.
1241   unsigned CurrOffset = 0;
1242   LLT PtrTy = MRI.getType(Src);
1243   unsigned Size = KnownLen;
1244   for (auto CopyTy : MemOps) {
1245     // Issuing an unaligned load / store pair  that overlaps with the previous
1246     // pair. Adjust the offset accordingly.
1247     if (CopyTy.getSizeInBytes() > Size)
1248       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1249 
1250     // Construct MMOs for the accesses.
1251     auto *LoadMMO =
1252         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1253     auto *StoreMMO =
1254         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1255 
1256     // Create the load.
1257     Register LoadPtr = Src;
1258     Register Offset;
1259     if (CurrOffset != 0) {
1260       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1261                    .getReg(0);
1262       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1263     }
1264     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1265 
1266     // Create the store.
1267     Register StorePtr =
1268         CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1269     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1270     CurrOffset += CopyTy.getSizeInBytes();
1271     Size -= CopyTy.getSizeInBytes();
1272   }
1273 
1274   MI.eraseFromParent();
1275   return true;
1276 }
1277 
1278 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1279                                      Register Src, unsigned KnownLen,
1280                                      Align DstAlign, Align SrcAlign,
1281                                      bool IsVolatile) {
1282   auto &MF = *MI.getParent()->getParent();
1283   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1284   auto &DL = MF.getDataLayout();
1285   LLVMContext &C = MF.getFunction().getContext();
1286 
1287   assert(KnownLen != 0 && "Have a zero length memmove length!");
1288 
1289   bool DstAlignCanChange = false;
1290   MachineFrameInfo &MFI = MF.getFrameInfo();
1291   bool OptSize = shouldLowerMemFuncForSize(MF);
1292   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1293 
1294   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1295   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1296     DstAlignCanChange = true;
1297 
1298   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1299   std::vector<LLT> MemOps;
1300 
1301   const auto &DstMMO = **MI.memoperands_begin();
1302   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1303   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1304   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1305 
1306   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1307   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1308   // same thing here.
1309   if (!findGISelOptimalMemOpLowering(
1310           MemOps, Limit,
1311           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1312                       /*IsVolatile*/ true),
1313           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1314           MF.getFunction().getAttributes(), TLI))
1315     return false;
1316 
1317   if (DstAlignCanChange) {
1318     // Get an estimate of the type from the LLT.
1319     Type *IRTy = getTypeForLLT(MemOps[0], C);
1320     Align NewAlign = DL.getABITypeAlign(IRTy);
1321 
1322     // Don't promote to an alignment that would require dynamic stack
1323     // realignment.
1324     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1325     if (!TRI->needsStackRealignment(MF))
1326       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1327         NewAlign = NewAlign / 2;
1328 
1329     if (NewAlign > Alignment) {
1330       Alignment = NewAlign;
1331       unsigned FI = FIDef->getOperand(1).getIndex();
1332       // Give the stack frame object a larger alignment if needed.
1333       if (MFI.getObjectAlign(FI) < Alignment)
1334         MFI.setObjectAlignment(FI, Alignment);
1335     }
1336   }
1337 
1338   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1339 
1340   MachineIRBuilder MIB(MI);
1341   // Memmove requires that we perform the loads first before issuing the stores.
1342   // Apart from that, this loop is pretty much doing the same thing as the
1343   // memcpy codegen function.
1344   unsigned CurrOffset = 0;
1345   LLT PtrTy = MRI.getType(Src);
1346   SmallVector<Register, 16> LoadVals;
1347   for (auto CopyTy : MemOps) {
1348     // Construct MMO for the load.
1349     auto *LoadMMO =
1350         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1351 
1352     // Create the load.
1353     Register LoadPtr = Src;
1354     if (CurrOffset != 0) {
1355       auto Offset =
1356           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1357       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1358     }
1359     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1360     CurrOffset += CopyTy.getSizeInBytes();
1361   }
1362 
1363   CurrOffset = 0;
1364   for (unsigned I = 0; I < MemOps.size(); ++I) {
1365     LLT CopyTy = MemOps[I];
1366     // Now store the values loaded.
1367     auto *StoreMMO =
1368         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1369 
1370     Register StorePtr = Dst;
1371     if (CurrOffset != 0) {
1372       auto Offset =
1373           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1374       StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1375     }
1376     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1377     CurrOffset += CopyTy.getSizeInBytes();
1378   }
1379   MI.eraseFromParent();
1380   return true;
1381 }
1382 
1383 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1384   // This combine is fairly complex so it's not written with a separate
1385   // matcher function.
1386   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1387   Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
1388   assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1389           ID == Intrinsic::memset) &&
1390          "Expected a memcpy like intrinsic");
1391 
1392   auto MMOIt = MI.memoperands_begin();
1393   const MachineMemOperand *MemOp = *MMOIt;
1394   bool IsVolatile = MemOp->isVolatile();
1395   // Don't try to optimize volatile.
1396   if (IsVolatile)
1397     return false;
1398 
1399   Align DstAlign = MemOp->getBaseAlign();
1400   Align SrcAlign;
1401   Register Dst = MI.getOperand(1).getReg();
1402   Register Src = MI.getOperand(2).getReg();
1403   Register Len = MI.getOperand(3).getReg();
1404 
1405   if (ID != Intrinsic::memset) {
1406     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1407     MemOp = *(++MMOIt);
1408     SrcAlign = MemOp->getBaseAlign();
1409   }
1410 
1411   // See if this is a constant length copy
1412   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1413   if (!LenVRegAndVal)
1414     return false; // Leave it to the legalizer to lower it to a libcall.
1415   unsigned KnownLen = LenVRegAndVal->Value;
1416 
1417   if (KnownLen == 0) {
1418     MI.eraseFromParent();
1419     return true;
1420   }
1421 
1422   if (MaxLen && KnownLen > MaxLen)
1423     return false;
1424 
1425   if (ID == Intrinsic::memcpy)
1426     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1427   if (ID == Intrinsic::memmove)
1428     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1429   if (ID == Intrinsic::memset)
1430     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1431   return false;
1432 }
1433 
1434 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1435                                            PtrAddChain &MatchInfo) {
1436   // We're trying to match the following pattern:
1437   //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1438   //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1439   // -->
1440   //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1441 
1442   if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1443     return false;
1444 
1445   Register Add2 = MI.getOperand(1).getReg();
1446   Register Imm1 = MI.getOperand(2).getReg();
1447   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1448   if (!MaybeImmVal)
1449     return false;
1450 
1451   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1452   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1453     return false;
1454 
1455   Register Base = Add2Def->getOperand(1).getReg();
1456   Register Imm2 = Add2Def->getOperand(2).getReg();
1457   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1458   if (!MaybeImm2Val)
1459     return false;
1460 
1461   // Pass the combined immediate to the apply function.
1462   MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1463   MatchInfo.Base = Base;
1464   return true;
1465 }
1466 
1467 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1468                                            PtrAddChain &MatchInfo) {
1469   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1470   MachineIRBuilder MIB(MI);
1471   LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1472   auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1473   Observer.changingInstr(MI);
1474   MI.getOperand(1).setReg(MatchInfo.Base);
1475   MI.getOperand(2).setReg(NewOffset.getReg(0));
1476   Observer.changedInstr(MI);
1477   return true;
1478 }
1479 
1480 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1481                                           unsigned &ShiftVal) {
1482   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1483   auto MaybeImmVal =
1484       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1485   if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
1486     return false;
1487   ShiftVal = Log2_64(MaybeImmVal->Value);
1488   return true;
1489 }
1490 
1491 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1492                                           unsigned &ShiftVal) {
1493   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1494   MachineIRBuilder MIB(MI);
1495   LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1496   auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1497   Observer.changingInstr(MI);
1498   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1499   MI.getOperand(2).setReg(ShiftCst.getReg(0));
1500   Observer.changedInstr(MI);
1501   return true;
1502 }
1503 
1504 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
1505                                                 unsigned TargetShiftSize,
1506                                                 unsigned &ShiftVal) {
1507   assert((MI.getOpcode() == TargetOpcode::G_SHL ||
1508           MI.getOpcode() == TargetOpcode::G_LSHR ||
1509           MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
1510 
1511   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1512   if (Ty.isVector()) // TODO:
1513     return false;
1514 
1515   // Don't narrow further than the requested size.
1516   unsigned Size = Ty.getSizeInBits();
1517   if (Size <= TargetShiftSize)
1518     return false;
1519 
1520   auto MaybeImmVal =
1521     getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1522   if (!MaybeImmVal)
1523     return false;
1524 
1525   ShiftVal = MaybeImmVal->Value;
1526   return ShiftVal >= Size / 2 && ShiftVal < Size;
1527 }
1528 
1529 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
1530                                                 const unsigned &ShiftVal) {
1531   Register DstReg = MI.getOperand(0).getReg();
1532   Register SrcReg = MI.getOperand(1).getReg();
1533   LLT Ty = MRI.getType(SrcReg);
1534   unsigned Size = Ty.getSizeInBits();
1535   unsigned HalfSize = Size / 2;
1536   assert(ShiftVal >= HalfSize);
1537 
1538   LLT HalfTy = LLT::scalar(HalfSize);
1539 
1540   Builder.setInstr(MI);
1541   auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
1542   unsigned NarrowShiftAmt = ShiftVal - HalfSize;
1543 
1544   if (MI.getOpcode() == TargetOpcode::G_LSHR) {
1545     Register Narrowed = Unmerge.getReg(1);
1546 
1547     //  dst = G_LSHR s64:x, C for C >= 32
1548     // =>
1549     //   lo, hi = G_UNMERGE_VALUES x
1550     //   dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
1551 
1552     if (NarrowShiftAmt != 0) {
1553       Narrowed = Builder.buildLShr(HalfTy, Narrowed,
1554         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1555     }
1556 
1557     auto Zero = Builder.buildConstant(HalfTy, 0);
1558     Builder.buildMerge(DstReg, { Narrowed, Zero });
1559   } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
1560     Register Narrowed = Unmerge.getReg(0);
1561     //  dst = G_SHL s64:x, C for C >= 32
1562     // =>
1563     //   lo, hi = G_UNMERGE_VALUES x
1564     //   dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
1565     if (NarrowShiftAmt != 0) {
1566       Narrowed = Builder.buildShl(HalfTy, Narrowed,
1567         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1568     }
1569 
1570     auto Zero = Builder.buildConstant(HalfTy, 0);
1571     Builder.buildMerge(DstReg, { Zero, Narrowed });
1572   } else {
1573     assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1574     auto Hi = Builder.buildAShr(
1575       HalfTy, Unmerge.getReg(1),
1576       Builder.buildConstant(HalfTy, HalfSize - 1));
1577 
1578     if (ShiftVal == HalfSize) {
1579       // (G_ASHR i64:x, 32) ->
1580       //   G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
1581       Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
1582     } else if (ShiftVal == Size - 1) {
1583       // Don't need a second shift.
1584       // (G_ASHR i64:x, 63) ->
1585       //   %narrowed = (G_ASHR hi_32(x), 31)
1586       //   G_MERGE_VALUES %narrowed, %narrowed
1587       Builder.buildMerge(DstReg, { Hi, Hi });
1588     } else {
1589       auto Lo = Builder.buildAShr(
1590         HalfTy, Unmerge.getReg(1),
1591         Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
1592 
1593       // (G_ASHR i64:x, C) ->, for C >= 32
1594       //   G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
1595       Builder.buildMerge(DstReg, { Lo, Hi });
1596     }
1597   }
1598 
1599   MI.eraseFromParent();
1600   return true;
1601 }
1602 
1603 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
1604                                               unsigned TargetShiftAmount) {
1605   unsigned ShiftAmt;
1606   if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
1607     applyCombineShiftToUnmerge(MI, ShiftAmt);
1608     return true;
1609   }
1610 
1611   return false;
1612 }
1613 
1614 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
1615   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
1616   Register DstReg = MI.getOperand(0).getReg();
1617   LLT DstTy = MRI.getType(DstReg);
1618   Register SrcReg = MI.getOperand(1).getReg();
1619   return mi_match(SrcReg, MRI,
1620                   m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
1621 }
1622 
1623 bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
1624   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
1625   Register DstReg = MI.getOperand(0).getReg();
1626   Builder.setInstr(MI);
1627   Builder.buildCopy(DstReg, Reg);
1628   MI.eraseFromParent();
1629   return true;
1630 }
1631 
1632 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
1633   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
1634   Register SrcReg = MI.getOperand(1).getReg();
1635   return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
1636 }
1637 
1638 bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
1639   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
1640   Register DstReg = MI.getOperand(0).getReg();
1641   Builder.setInstr(MI);
1642   Builder.buildZExtOrTrunc(DstReg, Reg);
1643   MI.eraseFromParent();
1644   return true;
1645 }
1646 
1647 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
1648   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1649     return MO.isReg() &&
1650            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1651   });
1652 }
1653 
1654 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
1655   return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1656     return !MO.isReg() ||
1657            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1658   });
1659 }
1660 
1661 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
1662   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
1663   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
1664   return all_of(Mask, [](int Elt) { return Elt < 0; });
1665 }
1666 
1667 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
1668   assert(MI.getOpcode() == TargetOpcode::G_STORE);
1669   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
1670                       MRI);
1671 }
1672 
1673 bool CombinerHelper::eraseInst(MachineInstr &MI) {
1674   MI.eraseFromParent();
1675   return true;
1676 }
1677 
1678 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
1679                                     const MachineOperand &MOP2) {
1680   if (!MOP1.isReg() || !MOP2.isReg())
1681     return false;
1682   MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
1683   if (!I1)
1684     return false;
1685   MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
1686   if (!I2)
1687     return false;
1688 
1689   // Handle a case like this:
1690   //
1691   // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
1692   //
1693   // Even though %0 and %1 are produced by the same instruction they are not
1694   // the same values.
1695   if (I1 == I2)
1696     return MOP1.getReg() == MOP2.getReg();
1697 
1698   // If we have an instruction which loads or stores, we can't guarantee that
1699   // it is identical.
1700   //
1701   // For example, we may have
1702   //
1703   // %x1 = G_LOAD %addr (load N from @somewhere)
1704   // ...
1705   // call @foo
1706   // ...
1707   // %x2 = G_LOAD %addr (load N from @somewhere)
1708   // ...
1709   // %or = G_OR %x1, %x2
1710   //
1711   // It's possible that @foo will modify whatever lives at the address we're
1712   // loading from. To be safe, let's just assume that all loads and stores
1713   // are different (unless we have something which is guaranteed to not
1714   // change.)
1715   if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
1716     return false;
1717 
1718   // Check for physical registers on the instructions first to avoid cases
1719   // like this:
1720   //
1721   // %a = COPY $physreg
1722   // ...
1723   // SOMETHING implicit-def $physreg
1724   // ...
1725   // %b = COPY $physreg
1726   //
1727   // These copies are not equivalent.
1728   if (any_of(I1->uses(), [](const MachineOperand &MO) {
1729         return MO.isReg() && MO.getReg().isPhysical();
1730       })) {
1731     // Check if we have a case like this:
1732     //
1733     // %a = COPY $physreg
1734     // %b = COPY %a
1735     //
1736     // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
1737     // From that, we know that they must have the same value, since they must
1738     // have come from the same COPY.
1739     return I1->isIdenticalTo(*I2);
1740   }
1741 
1742   // We don't have any physical registers, so we don't necessarily need the
1743   // same vreg defs.
1744   //
1745   // On the off-chance that there's some target instruction feeding into the
1746   // instruction, let's use produceSameValue instead of isIdenticalTo.
1747   return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
1748 }
1749 
1750 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
1751   if (!MOP.isReg())
1752     return false;
1753   // MIPatternMatch doesn't let us look through G_ZEXT etc.
1754   auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
1755   return ValAndVReg && ValAndVReg->Value == C;
1756 }
1757 
1758 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
1759                                                      unsigned OpIdx) {
1760   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
1761   Register OldReg = MI.getOperand(0).getReg();
1762   Register Replacement = MI.getOperand(OpIdx).getReg();
1763   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
1764   MI.eraseFromParent();
1765   replaceRegWith(MRI, OldReg, Replacement);
1766   return true;
1767 }
1768 
1769 bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
1770                                                  Register Replacement) {
1771   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
1772   Register OldReg = MI.getOperand(0).getReg();
1773   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
1774   MI.eraseFromParent();
1775   replaceRegWith(MRI, OldReg, Replacement);
1776   return true;
1777 }
1778 
1779 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
1780   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
1781   // Match (cond ? x : x)
1782   return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
1783          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
1784                        MRI);
1785 }
1786 
1787 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
1788   return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
1789          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1790                        MRI);
1791 }
1792 
1793 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
1794   return matchConstantOp(MI.getOperand(OpIdx), 0) &&
1795          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
1796                        MRI);
1797 }
1798 
1799 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
1800   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1801   Builder.setInstr(MI);
1802   Builder.buildFConstant(MI.getOperand(0), C);
1803   MI.eraseFromParent();
1804   return true;
1805 }
1806 
1807 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
1808   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1809   Builder.setInstr(MI);
1810   Builder.buildConstant(MI.getOperand(0), C);
1811   MI.eraseFromParent();
1812   return true;
1813 }
1814 
1815 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
1816   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1817   Builder.setInstr(MI);
1818   Builder.buildUndef(MI.getOperand(0));
1819   MI.eraseFromParent();
1820   return true;
1821 }
1822 
1823 bool CombinerHelper::matchSimplifyAddToSub(
1824     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1825   Register LHS = MI.getOperand(1).getReg();
1826   Register RHS = MI.getOperand(2).getReg();
1827   Register &NewLHS = std::get<0>(MatchInfo);
1828   Register &NewRHS = std::get<1>(MatchInfo);
1829 
1830   // Helper lambda to check for opportunities for
1831   // ((0-A) + B) -> B - A
1832   // (A + (0-B)) -> A - B
1833   auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
1834     int64_t Cst;
1835     if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
1836         Cst != 0)
1837       return false;
1838     NewLHS = MaybeNewLHS;
1839     return true;
1840   };
1841 
1842   return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
1843 }
1844 
1845 bool CombinerHelper::applySimplifyAddToSub(
1846     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1847   Builder.setInstr(MI);
1848   Register SubLHS, SubRHS;
1849   std::tie(SubLHS, SubRHS) = MatchInfo;
1850   Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
1851   MI.eraseFromParent();
1852   return true;
1853 }
1854 
1855 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
1856     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
1857   // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
1858   //
1859   // Creates the new hand + logic instruction (but does not insert them.)
1860   //
1861   // On success, MatchInfo is populated with the new instructions. These are
1862   // inserted in applyHoistLogicOpWithSameOpcodeHands.
1863   unsigned LogicOpcode = MI.getOpcode();
1864   assert(LogicOpcode == TargetOpcode::G_AND ||
1865          LogicOpcode == TargetOpcode::G_OR ||
1866          LogicOpcode == TargetOpcode::G_XOR);
1867   MachineIRBuilder MIB(MI);
1868   Register Dst = MI.getOperand(0).getReg();
1869   Register LHSReg = MI.getOperand(1).getReg();
1870   Register RHSReg = MI.getOperand(2).getReg();
1871 
1872   // Don't recompute anything.
1873   if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
1874     return false;
1875 
1876   // Make sure we have (hand x, ...), (hand y, ...)
1877   MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
1878   MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
1879   if (!LeftHandInst || !RightHandInst)
1880     return false;
1881   unsigned HandOpcode = LeftHandInst->getOpcode();
1882   if (HandOpcode != RightHandInst->getOpcode())
1883     return false;
1884   if (!LeftHandInst->getOperand(1).isReg() ||
1885       !RightHandInst->getOperand(1).isReg())
1886     return false;
1887 
1888   // Make sure the types match up, and if we're doing this post-legalization,
1889   // we end up with legal types.
1890   Register X = LeftHandInst->getOperand(1).getReg();
1891   Register Y = RightHandInst->getOperand(1).getReg();
1892   LLT XTy = MRI.getType(X);
1893   LLT YTy = MRI.getType(Y);
1894   if (XTy != YTy)
1895     return false;
1896   if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
1897     return false;
1898 
1899   // Optional extra source register.
1900   Register ExtraHandOpSrcReg;
1901   switch (HandOpcode) {
1902   default:
1903     return false;
1904   case TargetOpcode::G_ANYEXT:
1905   case TargetOpcode::G_SEXT:
1906   case TargetOpcode::G_ZEXT: {
1907     // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
1908     break;
1909   }
1910   case TargetOpcode::G_AND:
1911   case TargetOpcode::G_ASHR:
1912   case TargetOpcode::G_LSHR:
1913   case TargetOpcode::G_SHL: {
1914     // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
1915     MachineOperand &ZOp = LeftHandInst->getOperand(2);
1916     if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
1917       return false;
1918     ExtraHandOpSrcReg = ZOp.getReg();
1919     break;
1920   }
1921   }
1922 
1923   // Record the steps to build the new instructions.
1924   //
1925   // Steps to build (logic x, y)
1926   auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
1927   OperandBuildSteps LogicBuildSteps = {
1928       [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
1929       [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
1930       [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
1931   InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
1932 
1933   // Steps to build hand (logic x, y), ...z
1934   OperandBuildSteps HandBuildSteps = {
1935       [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
1936       [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
1937   if (ExtraHandOpSrcReg.isValid())
1938     HandBuildSteps.push_back(
1939         [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
1940   InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
1941 
1942   MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
1943   return true;
1944 }
1945 
1946 bool CombinerHelper::applyBuildInstructionSteps(
1947     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
1948   assert(MatchInfo.InstrsToBuild.size() &&
1949          "Expected at least one instr to build?");
1950   Builder.setInstr(MI);
1951   for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
1952     assert(InstrToBuild.Opcode && "Expected a valid opcode?");
1953     assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
1954     MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
1955     for (auto &OperandFn : InstrToBuild.OperandFns)
1956       OperandFn(Instr);
1957   }
1958   MI.eraseFromParent();
1959   return true;
1960 }
1961 
1962 bool CombinerHelper::matchAshrShlToSextInreg(
1963     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
1964   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1965   int64_t ShlCst, AshrCst;
1966   Register Src;
1967   // FIXME: detect splat constant vectors.
1968   if (!mi_match(MI.getOperand(0).getReg(), MRI,
1969                 m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
1970     return false;
1971   if (ShlCst != AshrCst)
1972     return false;
1973   if (!isLegalOrBeforeLegalizer(
1974           {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
1975     return false;
1976   MatchInfo = std::make_tuple(Src, ShlCst);
1977   return true;
1978 }
1979 bool CombinerHelper::applyAshShlToSextInreg(
1980     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
1981   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1982   Register Src;
1983   int64_t ShiftAmt;
1984   std::tie(Src, ShiftAmt) = MatchInfo;
1985   unsigned Size = MRI.getType(Src).getScalarSizeInBits();
1986   Builder.setInstrAndDebugLoc(MI);
1987   Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
1988   MI.eraseFromParent();
1989   return true;
1990 }
1991 
1992 bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI,
1993                                              Register &Replacement) {
1994   // Given
1995   //
1996   // %mask:_(sN) = G_CONSTANT iN 000...0111...1
1997   // %x:_(sN) = G_SOMETHING
1998   // %y:_(sN) = G_AND %x, %mask
1999   //
2000   // Eliminate the G_AND when it is known that x & mask == x.
2001   //
2002   // Patterns like this can appear as a result of legalization. E.g.
2003   //
2004   // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
2005   // %one:_(s32) = G_CONSTANT i32 1
2006   // %and:_(s32) = G_AND %cmp, %one
2007   //
2008   // In this case, G_ICMP only produces a single bit, so x & 1 == x.
2009   assert(MI.getOpcode() == TargetOpcode::G_AND);
2010   if (!KB)
2011     return false;
2012 
2013   // Replacement = %x, AndDst = %y. Check that we can replace AndDst with the
2014   // LHS of the G_AND.
2015   Replacement = MI.getOperand(1).getReg();
2016   Register AndDst = MI.getOperand(0).getReg();
2017   LLT DstTy = MRI.getType(AndDst);
2018 
2019   // FIXME: This should be removed once GISelKnownBits supports vectors.
2020   if (DstTy.isVector())
2021     return false;
2022   if (!canReplaceReg(AndDst, Replacement, MRI))
2023     return false;
2024 
2025   // Check that we have a constant on the RHS of the G_AND, which is of the form
2026   // 000...0111...1.
2027   int64_t Cst;
2028   if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Cst)))
2029     return false;
2030   APInt Mask(DstTy.getSizeInBits(), Cst);
2031   if (!Mask.isMask())
2032     return false;
2033 
2034   // Now, let's check that x & Mask == x. If this is true, then x & ~Mask == 0.
2035   return KB->maskedValueIsZero(Replacement, ~Mask);
2036 }
2037 
2038 bool CombinerHelper::tryCombine(MachineInstr &MI) {
2039   if (tryCombineCopy(MI))
2040     return true;
2041   if (tryCombineExtendingLoads(MI))
2042     return true;
2043   if (tryCombineIndexedLoadStore(MI))
2044     return true;
2045   return false;
2046 }
2047