1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/CodeGen/GlobalISel/Combiner.h"
10 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
12 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
13 #include "llvm/CodeGen/GlobalISel/Utils.h"
14 #include "llvm/CodeGen/MachineDominators.h"
15 #include "llvm/CodeGen/MachineFrameInfo.h"
16 #include "llvm/CodeGen/MachineInstr.h"
17 #include "llvm/CodeGen/MachineRegisterInfo.h"
18 #include "llvm/CodeGen/TargetInstrInfo.h"
19 #include "llvm/CodeGen/TargetLowering.h"
20 #include "llvm/Support/MathExtras.h"
21 #include "llvm/Target/TargetMachine.h"
22 
23 #define DEBUG_TYPE "gi-combiner"
24 
25 using namespace llvm;
26 
27 // Option to allow testing of the combiner while no targets know about indexed
28 // addressing.
29 static cl::opt<bool>
30     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
31                        cl::desc("Force all indexed operations to be "
32                                 "legal for the GlobalISel combiner"));
33 
34 
35 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
36                                MachineIRBuilder &B, GISelKnownBits *KB,
37                                MachineDominatorTree *MDT)
38     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
39       KB(KB), MDT(MDT) {
40   (void)this->KB;
41 }
42 
43 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
44                                     Register ToReg) const {
45   Observer.changingAllUsesOfReg(MRI, FromReg);
46 
47   if (MRI.constrainRegAttrs(ToReg, FromReg))
48     MRI.replaceRegWith(FromReg, ToReg);
49   else
50     Builder.buildCopy(ToReg, FromReg);
51 
52   Observer.finishedChangingAllUsesOfReg();
53 }
54 
55 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
56                                       MachineOperand &FromRegOp,
57                                       Register ToReg) const {
58   assert(FromRegOp.getParent() && "Expected an operand in an MI");
59   Observer.changingInstr(*FromRegOp.getParent());
60 
61   FromRegOp.setReg(ToReg);
62 
63   Observer.changedInstr(*FromRegOp.getParent());
64 }
65 
66 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
67   if (matchCombineCopy(MI)) {
68     applyCombineCopy(MI);
69     return true;
70   }
71   return false;
72 }
73 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
74   if (MI.getOpcode() != TargetOpcode::COPY)
75     return false;
76   Register DstReg = MI.getOperand(0).getReg();
77   Register SrcReg = MI.getOperand(1).getReg();
78   return canReplaceReg(DstReg, SrcReg, MRI);
79 }
80 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
81   Register DstReg = MI.getOperand(0).getReg();
82   Register SrcReg = MI.getOperand(1).getReg();
83   MI.eraseFromParent();
84   replaceRegWith(MRI, DstReg, SrcReg);
85 }
86 
87 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
88   bool IsUndef = false;
89   SmallVector<Register, 4> Ops;
90   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
91     applyCombineConcatVectors(MI, IsUndef, Ops);
92     return true;
93   }
94   return false;
95 }
96 
97 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
98                                                SmallVectorImpl<Register> &Ops) {
99   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
100          "Invalid instruction");
101   IsUndef = true;
102   MachineInstr *Undef = nullptr;
103 
104   // Walk over all the operands of concat vectors and check if they are
105   // build_vector themselves or undef.
106   // Then collect their operands in Ops.
107   for (const MachineOperand &MO : MI.uses()) {
108     Register Reg = MO.getReg();
109     MachineInstr *Def = MRI.getVRegDef(Reg);
110     assert(Def && "Operand not defined");
111     switch (Def->getOpcode()) {
112     case TargetOpcode::G_BUILD_VECTOR:
113       IsUndef = false;
114       // Remember the operands of the build_vector to fold
115       // them into the yet-to-build flattened concat vectors.
116       for (const MachineOperand &BuildVecMO : Def->uses())
117         Ops.push_back(BuildVecMO.getReg());
118       break;
119     case TargetOpcode::G_IMPLICIT_DEF: {
120       LLT OpType = MRI.getType(Reg);
121       // Keep one undef value for all the undef operands.
122       if (!Undef) {
123         Builder.setInsertPt(*MI.getParent(), MI);
124         Undef = Builder.buildUndef(OpType.getScalarType());
125       }
126       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
127                  OpType.getScalarType() &&
128              "All undefs should have the same type");
129       // Break the undef vector in as many scalar elements as needed
130       // for the flattening.
131       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
132            EltIdx != EltEnd; ++EltIdx)
133         Ops.push_back(Undef->getOperand(0).getReg());
134       break;
135     }
136     default:
137       return false;
138     }
139   }
140   return true;
141 }
142 void CombinerHelper::applyCombineConcatVectors(
143     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
144   // We determined that the concat_vectors can be flatten.
145   // Generate the flattened build_vector.
146   Register DstReg = MI.getOperand(0).getReg();
147   Builder.setInsertPt(*MI.getParent(), MI);
148   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
149 
150   // Note: IsUndef is sort of redundant. We could have determine it by
151   // checking that at all Ops are undef.  Alternatively, we could have
152   // generate a build_vector of undefs and rely on another combine to
153   // clean that up.  For now, given we already gather this information
154   // in tryCombineConcatVectors, just save compile time and issue the
155   // right thing.
156   if (IsUndef)
157     Builder.buildUndef(NewDstReg);
158   else
159     Builder.buildBuildVector(NewDstReg, Ops);
160   MI.eraseFromParent();
161   replaceRegWith(MRI, DstReg, NewDstReg);
162 }
163 
164 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
165   SmallVector<Register, 4> Ops;
166   if (matchCombineShuffleVector(MI, Ops)) {
167     applyCombineShuffleVector(MI, Ops);
168     return true;
169   }
170   return false;
171 }
172 
173 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
174                                                SmallVectorImpl<Register> &Ops) {
175   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
176          "Invalid instruction kind");
177   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
178   Register Src1 = MI.getOperand(1).getReg();
179   LLT SrcType = MRI.getType(Src1);
180   // As bizarre as it may look, shuffle vector can actually produce
181   // scalar! This is because at the IR level a <1 x ty> shuffle
182   // vector is perfectly valid.
183   unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
184   unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
185 
186   // If the resulting vector is smaller than the size of the source
187   // vectors being concatenated, we won't be able to replace the
188   // shuffle vector into a concat_vectors.
189   //
190   // Note: We may still be able to produce a concat_vectors fed by
191   //       extract_vector_elt and so on. It is less clear that would
192   //       be better though, so don't bother for now.
193   //
194   // If the destination is a scalar, the size of the sources doesn't
195   // matter. we will lower the shuffle to a plain copy. This will
196   // work only if the source and destination have the same size. But
197   // that's covered by the next condition.
198   //
199   // TODO: If the size between the source and destination don't match
200   //       we could still emit an extract vector element in that case.
201   if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
202     return false;
203 
204   // Check that the shuffle mask can be broken evenly between the
205   // different sources.
206   if (DstNumElts % SrcNumElts != 0)
207     return false;
208 
209   // Mask length is a multiple of the source vector length.
210   // Check if the shuffle is some kind of concatenation of the input
211   // vectors.
212   unsigned NumConcat = DstNumElts / SrcNumElts;
213   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
214   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
215   for (unsigned i = 0; i != DstNumElts; ++i) {
216     int Idx = Mask[i];
217     // Undef value.
218     if (Idx < 0)
219       continue;
220     // Ensure the indices in each SrcType sized piece are sequential and that
221     // the same source is used for the whole piece.
222     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
223         (ConcatSrcs[i / SrcNumElts] >= 0 &&
224          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
225       return false;
226     // Remember which source this index came from.
227     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
228   }
229 
230   // The shuffle is concatenating multiple vectors together.
231   // Collect the different operands for that.
232   Register UndefReg;
233   Register Src2 = MI.getOperand(2).getReg();
234   for (auto Src : ConcatSrcs) {
235     if (Src < 0) {
236       if (!UndefReg) {
237         Builder.setInsertPt(*MI.getParent(), MI);
238         UndefReg = Builder.buildUndef(SrcType).getReg(0);
239       }
240       Ops.push_back(UndefReg);
241     } else if (Src == 0)
242       Ops.push_back(Src1);
243     else
244       Ops.push_back(Src2);
245   }
246   return true;
247 }
248 
249 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
250                                                const ArrayRef<Register> Ops) {
251   Register DstReg = MI.getOperand(0).getReg();
252   Builder.setInsertPt(*MI.getParent(), MI);
253   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
254 
255   if (Ops.size() == 1)
256     Builder.buildCopy(NewDstReg, Ops[0]);
257   else
258     Builder.buildMerge(NewDstReg, Ops);
259 
260   MI.eraseFromParent();
261   replaceRegWith(MRI, DstReg, NewDstReg);
262 }
263 
264 namespace {
265 
266 /// Select a preference between two uses. CurrentUse is the current preference
267 /// while *ForCandidate is attributes of the candidate under consideration.
268 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
269                                   const LLT TyForCandidate,
270                                   unsigned OpcodeForCandidate,
271                                   MachineInstr *MIForCandidate) {
272   if (!CurrentUse.Ty.isValid()) {
273     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
274         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
275       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
276     return CurrentUse;
277   }
278 
279   // We permit the extend to hoist through basic blocks but this is only
280   // sensible if the target has extending loads. If you end up lowering back
281   // into a load and extend during the legalizer then the end result is
282   // hoisting the extend up to the load.
283 
284   // Prefer defined extensions to undefined extensions as these are more
285   // likely to reduce the number of instructions.
286   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
287       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
288     return CurrentUse;
289   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
290            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
291     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
292 
293   // Prefer sign extensions to zero extensions as sign-extensions tend to be
294   // more expensive.
295   if (CurrentUse.Ty == TyForCandidate) {
296     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
297         OpcodeForCandidate == TargetOpcode::G_ZEXT)
298       return CurrentUse;
299     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
300              OpcodeForCandidate == TargetOpcode::G_SEXT)
301       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
302   }
303 
304   // This is potentially target specific. We've chosen the largest type
305   // because G_TRUNC is usually free. One potential catch with this is that
306   // some targets have a reduced number of larger registers than smaller
307   // registers and this choice potentially increases the live-range for the
308   // larger value.
309   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
310     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
311   }
312   return CurrentUse;
313 }
314 
315 /// Find a suitable place to insert some instructions and insert them. This
316 /// function accounts for special cases like inserting before a PHI node.
317 /// The current strategy for inserting before PHI's is to duplicate the
318 /// instructions for each predecessor. However, while that's ok for G_TRUNC
319 /// on most targets since it generally requires no code, other targets/cases may
320 /// want to try harder to find a dominating block.
321 static void InsertInsnsWithoutSideEffectsBeforeUse(
322     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
323     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
324                        MachineOperand &UseMO)>
325         Inserter) {
326   MachineInstr &UseMI = *UseMO.getParent();
327 
328   MachineBasicBlock *InsertBB = UseMI.getParent();
329 
330   // If the use is a PHI then we want the predecessor block instead.
331   if (UseMI.isPHI()) {
332     MachineOperand *PredBB = std::next(&UseMO);
333     InsertBB = PredBB->getMBB();
334   }
335 
336   // If the block is the same block as the def then we want to insert just after
337   // the def instead of at the start of the block.
338   if (InsertBB == DefMI.getParent()) {
339     MachineBasicBlock::iterator InsertPt = &DefMI;
340     Inserter(InsertBB, std::next(InsertPt), UseMO);
341     return;
342   }
343 
344   // Otherwise we want the start of the BB
345   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
346 }
347 } // end anonymous namespace
348 
349 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
350   PreferredTuple Preferred;
351   if (matchCombineExtendingLoads(MI, Preferred)) {
352     applyCombineExtendingLoads(MI, Preferred);
353     return true;
354   }
355   return false;
356 }
357 
358 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
359                                                 PreferredTuple &Preferred) {
360   // We match the loads and follow the uses to the extend instead of matching
361   // the extends and following the def to the load. This is because the load
362   // must remain in the same position for correctness (unless we also add code
363   // to find a safe place to sink it) whereas the extend is freely movable.
364   // It also prevents us from duplicating the load for the volatile case or just
365   // for performance.
366 
367   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
368       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
369       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
370     return false;
371 
372   auto &LoadValue = MI.getOperand(0);
373   assert(LoadValue.isReg() && "Result wasn't a register?");
374 
375   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
376   if (!LoadValueTy.isScalar())
377     return false;
378 
379   // Most architectures are going to legalize <s8 loads into at least a 1 byte
380   // load, and the MMOs can only describe memory accesses in multiples of bytes.
381   // If we try to perform extload combining on those, we can end up with
382   // %a(s8) = extload %ptr (load 1 byte from %ptr)
383   // ... which is an illegal extload instruction.
384   if (LoadValueTy.getSizeInBits() < 8)
385     return false;
386 
387   // For non power-of-2 types, they will very likely be legalized into multiple
388   // loads. Don't bother trying to match them into extending loads.
389   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
390     return false;
391 
392   // Find the preferred type aside from the any-extends (unless it's the only
393   // one) and non-extending ops. We'll emit an extending load to that type and
394   // and emit a variant of (extend (trunc X)) for the others according to the
395   // relative type sizes. At the same time, pick an extend to use based on the
396   // extend involved in the chosen type.
397   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
398                                  ? TargetOpcode::G_ANYEXT
399                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
400                                        ? TargetOpcode::G_SEXT
401                                        : TargetOpcode::G_ZEXT;
402   Preferred = {LLT(), PreferredOpcode, nullptr};
403   for (auto &UseMI : MRI.use_instructions(LoadValue.getReg())) {
404     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
405         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
406         UseMI.getOpcode() == TargetOpcode::G_ANYEXT) {
407       Preferred = ChoosePreferredUse(Preferred,
408                                      MRI.getType(UseMI.getOperand(0).getReg()),
409                                      UseMI.getOpcode(), &UseMI);
410     }
411   }
412 
413   // There were no extends
414   if (!Preferred.MI)
415     return false;
416   // It should be impossible to chose an extend without selecting a different
417   // type since by definition the result of an extend is larger.
418   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
419 
420   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
421   return true;
422 }
423 
424 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
425                                                 PreferredTuple &Preferred) {
426   // Rewrite the load to the chosen extending load.
427   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
428 
429   // Inserter to insert a truncate back to the original type at a given point
430   // with some basic CSE to limit truncate duplication to one per BB.
431   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
432   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
433                            MachineBasicBlock::iterator InsertBefore,
434                            MachineOperand &UseMO) {
435     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
436     if (PreviouslyEmitted) {
437       Observer.changingInstr(*UseMO.getParent());
438       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
439       Observer.changedInstr(*UseMO.getParent());
440       return;
441     }
442 
443     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
444     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
445     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
446     EmittedInsns[InsertIntoBB] = NewMI;
447     replaceRegOpWith(MRI, UseMO, NewDstReg);
448   };
449 
450   Observer.changingInstr(MI);
451   MI.setDesc(
452       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
453                                ? TargetOpcode::G_SEXTLOAD
454                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
455                                      ? TargetOpcode::G_ZEXTLOAD
456                                      : TargetOpcode::G_LOAD));
457 
458   // Rewrite all the uses to fix up the types.
459   auto &LoadValue = MI.getOperand(0);
460   SmallVector<MachineOperand *, 4> Uses;
461   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
462     Uses.push_back(&UseMO);
463 
464   for (auto *UseMO : Uses) {
465     MachineInstr *UseMI = UseMO->getParent();
466 
467     // If the extend is compatible with the preferred extend then we should fix
468     // up the type and extend so that it uses the preferred use.
469     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
470         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
471       Register UseDstReg = UseMI->getOperand(0).getReg();
472       MachineOperand &UseSrcMO = UseMI->getOperand(1);
473       const LLT UseDstTy = MRI.getType(UseDstReg);
474       if (UseDstReg != ChosenDstReg) {
475         if (Preferred.Ty == UseDstTy) {
476           // If the use has the same type as the preferred use, then merge
477           // the vregs and erase the extend. For example:
478           //    %1:_(s8) = G_LOAD ...
479           //    %2:_(s32) = G_SEXT %1(s8)
480           //    %3:_(s32) = G_ANYEXT %1(s8)
481           //    ... = ... %3(s32)
482           // rewrites to:
483           //    %2:_(s32) = G_SEXTLOAD ...
484           //    ... = ... %2(s32)
485           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
486           Observer.erasingInstr(*UseMO->getParent());
487           UseMO->getParent()->eraseFromParent();
488         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
489           // If the preferred size is smaller, then keep the extend but extend
490           // from the result of the extending load. For example:
491           //    %1:_(s8) = G_LOAD ...
492           //    %2:_(s32) = G_SEXT %1(s8)
493           //    %3:_(s64) = G_ANYEXT %1(s8)
494           //    ... = ... %3(s64)
495           /// rewrites to:
496           //    %2:_(s32) = G_SEXTLOAD ...
497           //    %3:_(s64) = G_ANYEXT %2:_(s32)
498           //    ... = ... %3(s64)
499           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
500         } else {
501           // If the preferred size is large, then insert a truncate. For
502           // example:
503           //    %1:_(s8) = G_LOAD ...
504           //    %2:_(s64) = G_SEXT %1(s8)
505           //    %3:_(s32) = G_ZEXT %1(s8)
506           //    ... = ... %3(s32)
507           /// rewrites to:
508           //    %2:_(s64) = G_SEXTLOAD ...
509           //    %4:_(s8) = G_TRUNC %2:_(s32)
510           //    %3:_(s64) = G_ZEXT %2:_(s8)
511           //    ... = ... %3(s64)
512           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
513                                                  InsertTruncAt);
514         }
515         continue;
516       }
517       // The use is (one of) the uses of the preferred use we chose earlier.
518       // We're going to update the load to def this value later so just erase
519       // the old extend.
520       Observer.erasingInstr(*UseMO->getParent());
521       UseMO->getParent()->eraseFromParent();
522       continue;
523     }
524 
525     // The use isn't an extend. Truncate back to the type we originally loaded.
526     // This is free on many targets.
527     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
528   }
529 
530   MI.getOperand(0).setReg(ChosenDstReg);
531   Observer.changedInstr(MI);
532 }
533 
534 bool CombinerHelper::isPredecessor(MachineInstr &DefMI, MachineInstr &UseMI) {
535   assert(DefMI.getParent() == UseMI.getParent());
536   if (&DefMI == &UseMI)
537     return false;
538 
539   // Loop through the basic block until we find one of the instructions.
540   MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
541   for (; &*I != &DefMI && &*I != &UseMI; ++I)
542     return &*I == &DefMI;
543 
544   llvm_unreachable("Block must contain instructions");
545 }
546 
547 bool CombinerHelper::dominates(MachineInstr &DefMI, MachineInstr &UseMI) {
548   if (MDT)
549     return MDT->dominates(&DefMI, &UseMI);
550   else if (DefMI.getParent() != UseMI.getParent())
551     return false;
552 
553   return isPredecessor(DefMI, UseMI);
554 }
555 
556 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
557                                             Register &Base, Register &Offset) {
558   auto &MF = *MI.getParent()->getParent();
559   const auto &TLI = *MF.getSubtarget().getTargetLowering();
560 
561 #ifndef NDEBUG
562   unsigned Opcode = MI.getOpcode();
563   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
564          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
565 #endif
566 
567   Base = MI.getOperand(1).getReg();
568   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
569   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
570     return false;
571 
572   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
573 
574   for (auto &Use : MRI.use_instructions(Base)) {
575     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
576       continue;
577 
578     Offset = Use.getOperand(2).getReg();
579     if (!ForceLegalIndexing &&
580         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
581       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
582                         << Use);
583       continue;
584     }
585 
586     // Make sure the offset calculation is before the potentially indexed op.
587     // FIXME: we really care about dependency here. The offset calculation might
588     // be movable.
589     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
590     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
591       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
592                         << Use);
593       continue;
594     }
595 
596     // FIXME: check whether all uses of Base are load/store with foldable
597     // addressing modes. If so, using the normal addr-modes is better than
598     // forming an indexed one.
599 
600     bool MemOpDominatesAddrUses = true;
601     for (auto &PtrAddUse : MRI.use_instructions(Use.getOperand(0).getReg())) {
602       if (!dominates(MI, PtrAddUse)) {
603         MemOpDominatesAddrUses = false;
604         break;
605       }
606     }
607 
608     if (!MemOpDominatesAddrUses) {
609       LLVM_DEBUG(
610           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
611                  << Use);
612       continue;
613     }
614 
615     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
616     Addr = Use.getOperand(0).getReg();
617     return true;
618   }
619 
620   return false;
621 }
622 
623 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
624                                            Register &Base, Register &Offset) {
625   auto &MF = *MI.getParent()->getParent();
626   const auto &TLI = *MF.getSubtarget().getTargetLowering();
627 
628 #ifndef NDEBUG
629   unsigned Opcode = MI.getOpcode();
630   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
631          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
632 #endif
633 
634   Addr = MI.getOperand(1).getReg();
635   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
636   if (!AddrDef || MRI.hasOneUse(Addr))
637     return false;
638 
639   Base = AddrDef->getOperand(1).getReg();
640   Offset = AddrDef->getOperand(2).getReg();
641 
642   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
643 
644   if (!ForceLegalIndexing &&
645       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
646     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
647     return false;
648   }
649 
650   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
651   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
652     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
653     return false;
654   }
655 
656   if (MI.getOpcode() == TargetOpcode::G_STORE) {
657     // Would require a copy.
658     if (Base == MI.getOperand(0).getReg()) {
659       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
660       return false;
661     }
662 
663     // We're expecting one use of Addr in MI, but it could also be the
664     // value stored, which isn't actually dominated by the instruction.
665     if (MI.getOperand(0).getReg() == Addr) {
666       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
667       return false;
668     }
669   }
670 
671   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
672   // That might allow us to end base's liveness here by adjusting the constant.
673 
674   for (auto &UseMI : MRI.use_instructions(Addr)) {
675     if (!dominates(MI, UseMI)) {
676       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
677       return false;
678     }
679   }
680 
681   return true;
682 }
683 
684 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
685   IndexedLoadStoreMatchInfo MatchInfo;
686   if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
687     applyCombineIndexedLoadStore(MI, MatchInfo);
688     return true;
689   }
690   return false;
691 }
692 
693 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
694   unsigned Opcode = MI.getOpcode();
695   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
696       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
697     return false;
698 
699   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
700                                           MatchInfo.Offset);
701   if (!MatchInfo.IsPre &&
702       !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
703                               MatchInfo.Offset))
704     return false;
705 
706   return true;
707 }
708 
709 void CombinerHelper::applyCombineIndexedLoadStore(
710     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
711   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
712   MachineIRBuilder MIRBuilder(MI);
713   unsigned Opcode = MI.getOpcode();
714   bool IsStore = Opcode == TargetOpcode::G_STORE;
715   unsigned NewOpcode;
716   switch (Opcode) {
717   case TargetOpcode::G_LOAD:
718     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
719     break;
720   case TargetOpcode::G_SEXTLOAD:
721     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
722     break;
723   case TargetOpcode::G_ZEXTLOAD:
724     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
725     break;
726   case TargetOpcode::G_STORE:
727     NewOpcode = TargetOpcode::G_INDEXED_STORE;
728     break;
729   default:
730     llvm_unreachable("Unknown load/store opcode");
731   }
732 
733   auto MIB = MIRBuilder.buildInstr(NewOpcode);
734   if (IsStore) {
735     MIB.addDef(MatchInfo.Addr);
736     MIB.addUse(MI.getOperand(0).getReg());
737   } else {
738     MIB.addDef(MI.getOperand(0).getReg());
739     MIB.addDef(MatchInfo.Addr);
740   }
741 
742   MIB.addUse(MatchInfo.Base);
743   MIB.addUse(MatchInfo.Offset);
744   MIB.addImm(MatchInfo.IsPre);
745   MI.eraseFromParent();
746   AddrDef.eraseFromParent();
747 
748   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
749 }
750 
751 bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
752   if (MI.getOpcode() != TargetOpcode::G_BR)
753     return false;
754 
755   // Try to match the following:
756   // bb1:
757   //   %c(s32) = G_ICMP pred, %a, %b
758   //   %c1(s1) = G_TRUNC %c(s32)
759   //   G_BRCOND %c1, %bb2
760   //   G_BR %bb3
761   // bb2:
762   // ...
763   // bb3:
764 
765   // The above pattern does not have a fall through to the successor bb2, always
766   // resulting in a branch no matter which path is taken. Here we try to find
767   // and replace that pattern with conditional branch to bb3 and otherwise
768   // fallthrough to bb2.
769 
770   MachineBasicBlock *MBB = MI.getParent();
771   MachineBasicBlock::iterator BrIt(MI);
772   if (BrIt == MBB->begin())
773     return false;
774   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
775 
776   MachineInstr *BrCond = &*std::prev(BrIt);
777   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
778     return false;
779 
780   // Check that the next block is the conditional branch target.
781   if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
782     return false;
783 
784   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
785   if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
786       !MRI.hasOneUse(CmpMI->getOperand(0).getReg()))
787     return false;
788   return true;
789 }
790 
791 bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
792   if (!matchElideBrByInvertingCond(MI))
793     return false;
794   applyElideBrByInvertingCond(MI);
795   return true;
796 }
797 
798 void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
799   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
800   MachineBasicBlock::iterator BrIt(MI);
801   MachineInstr *BrCond = &*std::prev(BrIt);
802   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
803 
804   CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
805       (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
806 
807   // Invert the G_ICMP condition.
808   Observer.changingInstr(*CmpMI);
809   CmpMI->getOperand(1).setPredicate(InversePred);
810   Observer.changedInstr(*CmpMI);
811 
812   // Change the conditional branch target.
813   Observer.changingInstr(*BrCond);
814   BrCond->getOperand(1).setMBB(BrTarget);
815   Observer.changedInstr(*BrCond);
816   MI.eraseFromParent();
817 }
818 
819 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
820   // On Darwin, -Os means optimize for size without hurting performance, so
821   // only really optimize for size when -Oz (MinSize) is used.
822   if (MF.getTarget().getTargetTriple().isOSDarwin())
823     return MF.getFunction().hasMinSize();
824   return MF.getFunction().hasOptSize();
825 }
826 
827 // Returns a list of types to use for memory op lowering in MemOps. A partial
828 // port of findOptimalMemOpLowering in TargetLowering.
829 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
830                                           unsigned Limit, const MemOp &Op,
831                                           unsigned DstAS, unsigned SrcAS,
832                                           const AttributeList &FuncAttributes,
833                                           const TargetLowering &TLI) {
834   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
835     return false;
836 
837   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
838 
839   if (Ty == LLT()) {
840     // Use the largest scalar type whose alignment constraints are satisfied.
841     // We only need to check DstAlign here as SrcAlign is always greater or
842     // equal to DstAlign (or zero).
843     Ty = LLT::scalar(64);
844     if (Op.isFixedDstAlign())
845       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
846              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS,
847                                                  Op.getDstAlign().value()))
848         Ty = LLT::scalar(Ty.getSizeInBytes());
849     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
850     // FIXME: check for the largest legal type we can load/store to.
851   }
852 
853   unsigned NumMemOps = 0;
854   uint64_t Size = Op.size();
855   while (Size) {
856     unsigned TySize = Ty.getSizeInBytes();
857     while (TySize > Size) {
858       // For now, only use non-vector load / store's for the left-over pieces.
859       LLT NewTy = Ty;
860       // FIXME: check for mem op safety and legality of the types. Not all of
861       // SDAGisms map cleanly to GISel concepts.
862       if (NewTy.isVector())
863         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
864       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
865       unsigned NewTySize = NewTy.getSizeInBytes();
866       assert(NewTySize > 0 && "Could not find appropriate type");
867 
868       // If the new LLT cannot cover all of the remaining bits, then consider
869       // issuing a (or a pair of) unaligned and overlapping load / store.
870       bool Fast;
871       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
872       MVT VT = getMVTForLLT(Ty);
873       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
874           TLI.allowsMisalignedMemoryAccesses(
875               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
876               MachineMemOperand::MONone, &Fast) &&
877           Fast)
878         TySize = Size;
879       else {
880         Ty = NewTy;
881         TySize = NewTySize;
882       }
883     }
884 
885     if (++NumMemOps > Limit)
886       return false;
887 
888     MemOps.push_back(Ty);
889     Size -= TySize;
890   }
891 
892   return true;
893 }
894 
895 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
896   if (Ty.isVector())
897     return VectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
898                            Ty.getNumElements());
899   return IntegerType::get(C, Ty.getSizeInBits());
900 }
901 
902 // Get a vectorized representation of the memset value operand, GISel edition.
903 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
904   MachineRegisterInfo &MRI = *MIB.getMRI();
905   unsigned NumBits = Ty.getScalarSizeInBits();
906   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
907   if (!Ty.isVector() && ValVRegAndVal) {
908     unsigned KnownVal = ValVRegAndVal->Value;
909     APInt Scalar = APInt(8, KnownVal);
910     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
911     return MIB.buildConstant(Ty, SplatVal).getReg(0);
912   }
913   // FIXME: for vector types create a G_BUILD_VECTOR.
914   if (Ty.isVector())
915     return Register();
916 
917   // Extend the byte value to the larger type, and then multiply by a magic
918   // value 0x010101... in order to replicate it across every byte.
919   LLT ExtType = Ty.getScalarType();
920   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
921   if (NumBits > 8) {
922     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
923     auto MagicMI = MIB.buildConstant(ExtType, Magic);
924     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
925   }
926 
927   assert(ExtType == Ty && "Vector memset value type not supported yet");
928   return Val;
929 }
930 
931 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
932                                     Register Val, unsigned KnownLen,
933                                     Align Alignment, bool IsVolatile) {
934   auto &MF = *MI.getParent()->getParent();
935   const auto &TLI = *MF.getSubtarget().getTargetLowering();
936   auto &DL = MF.getDataLayout();
937   LLVMContext &C = MF.getFunction().getContext();
938 
939   assert(KnownLen != 0 && "Have a zero length memset length!");
940 
941   bool DstAlignCanChange = false;
942   MachineFrameInfo &MFI = MF.getFrameInfo();
943   bool OptSize = shouldLowerMemFuncForSize(MF);
944 
945   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
946   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
947     DstAlignCanChange = true;
948 
949   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
950   std::vector<LLT> MemOps;
951 
952   const auto &DstMMO = **MI.memoperands_begin();
953   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
954 
955   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
956   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
957 
958   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
959                                      MemOp::Set(KnownLen, DstAlignCanChange,
960                                                 Alignment,
961                                                 /*IsZeroMemset=*/IsZeroVal,
962                                                 /*IsVolatile=*/IsVolatile),
963                                      DstPtrInfo.getAddrSpace(), ~0u,
964                                      MF.getFunction().getAttributes(), TLI))
965     return false;
966 
967   if (DstAlignCanChange) {
968     // Get an estimate of the type from the LLT.
969     Type *IRTy = getTypeForLLT(MemOps[0], C);
970     Align NewAlign = DL.getABITypeAlign(IRTy);
971     if (NewAlign > Alignment) {
972       Alignment = NewAlign;
973       unsigned FI = FIDef->getOperand(1).getIndex();
974       // Give the stack frame object a larger alignment if needed.
975       if (MFI.getObjectAlign(FI) < Alignment)
976         MFI.setObjectAlignment(FI, Alignment);
977     }
978   }
979 
980   MachineIRBuilder MIB(MI);
981   // Find the largest store and generate the bit pattern for it.
982   LLT LargestTy = MemOps[0];
983   for (unsigned i = 1; i < MemOps.size(); i++)
984     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
985       LargestTy = MemOps[i];
986 
987   // The memset stored value is always defined as an s8, so in order to make it
988   // work with larger store types we need to repeat the bit pattern across the
989   // wider type.
990   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
991 
992   if (!MemSetValue)
993     return false;
994 
995   // Generate the stores. For each store type in the list, we generate the
996   // matching store of that type to the destination address.
997   LLT PtrTy = MRI.getType(Dst);
998   unsigned DstOff = 0;
999   unsigned Size = KnownLen;
1000   for (unsigned I = 0; I < MemOps.size(); I++) {
1001     LLT Ty = MemOps[I];
1002     unsigned TySize = Ty.getSizeInBytes();
1003     if (TySize > Size) {
1004       // Issuing an unaligned load / store pair that overlaps with the previous
1005       // pair. Adjust the offset accordingly.
1006       assert(I == MemOps.size() - 1 && I != 0);
1007       DstOff -= TySize - Size;
1008     }
1009 
1010     // If this store is smaller than the largest store see whether we can get
1011     // the smaller value for free with a truncate.
1012     Register Value = MemSetValue;
1013     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1014       MVT VT = getMVTForLLT(Ty);
1015       MVT LargestVT = getMVTForLLT(LargestTy);
1016       if (!LargestTy.isVector() && !Ty.isVector() &&
1017           TLI.isTruncateFree(LargestVT, VT))
1018         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1019       else
1020         Value = getMemsetValue(Val, Ty, MIB);
1021       if (!Value)
1022         return false;
1023     }
1024 
1025     auto *StoreMMO =
1026         MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1027 
1028     Register Ptr = Dst;
1029     if (DstOff != 0) {
1030       auto Offset =
1031           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1032       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1033     }
1034 
1035     MIB.buildStore(Value, Ptr, *StoreMMO);
1036     DstOff += Ty.getSizeInBytes();
1037     Size -= TySize;
1038   }
1039 
1040   MI.eraseFromParent();
1041   return true;
1042 }
1043 
1044 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1045                                     Register Src, unsigned KnownLen,
1046                                     Align DstAlign, Align SrcAlign,
1047                                     bool IsVolatile) {
1048   auto &MF = *MI.getParent()->getParent();
1049   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1050   auto &DL = MF.getDataLayout();
1051   LLVMContext &C = MF.getFunction().getContext();
1052 
1053   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1054 
1055   bool DstAlignCanChange = false;
1056   MachineFrameInfo &MFI = MF.getFrameInfo();
1057   bool OptSize = shouldLowerMemFuncForSize(MF);
1058   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1059 
1060   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1061   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1062     DstAlignCanChange = true;
1063 
1064   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1065   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1066   // if the memcpy is in a tail call position.
1067 
1068   unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1069   std::vector<LLT> MemOps;
1070 
1071   const auto &DstMMO = **MI.memoperands_begin();
1072   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1073   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1074   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1075 
1076   if (!findGISelOptimalMemOpLowering(
1077           MemOps, Limit,
1078           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1079                       IsVolatile),
1080           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1081           MF.getFunction().getAttributes(), TLI))
1082     return false;
1083 
1084   if (DstAlignCanChange) {
1085     // Get an estimate of the type from the LLT.
1086     Type *IRTy = getTypeForLLT(MemOps[0], C);
1087     Align NewAlign = DL.getABITypeAlign(IRTy);
1088 
1089     // Don't promote to an alignment that would require dynamic stack
1090     // realignment.
1091     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1092     if (!TRI->needsStackRealignment(MF))
1093       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1094         NewAlign = NewAlign / 2;
1095 
1096     if (NewAlign > Alignment) {
1097       Alignment = NewAlign;
1098       unsigned FI = FIDef->getOperand(1).getIndex();
1099       // Give the stack frame object a larger alignment if needed.
1100       if (MFI.getObjectAlign(FI) < Alignment)
1101         MFI.setObjectAlignment(FI, Alignment);
1102     }
1103   }
1104 
1105   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1106 
1107   MachineIRBuilder MIB(MI);
1108   // Now we need to emit a pair of load and stores for each of the types we've
1109   // collected. I.e. for each type, generate a load from the source pointer of
1110   // that type width, and then generate a corresponding store to the dest buffer
1111   // of that value loaded. This can result in a sequence of loads and stores
1112   // mixed types, depending on what the target specifies as good types to use.
1113   unsigned CurrOffset = 0;
1114   LLT PtrTy = MRI.getType(Src);
1115   unsigned Size = KnownLen;
1116   for (auto CopyTy : MemOps) {
1117     // Issuing an unaligned load / store pair  that overlaps with the previous
1118     // pair. Adjust the offset accordingly.
1119     if (CopyTy.getSizeInBytes() > Size)
1120       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1121 
1122     // Construct MMOs for the accesses.
1123     auto *LoadMMO =
1124         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1125     auto *StoreMMO =
1126         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1127 
1128     // Create the load.
1129     Register LoadPtr = Src;
1130     Register Offset;
1131     if (CurrOffset != 0) {
1132       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1133                    .getReg(0);
1134       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1135     }
1136     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1137 
1138     // Create the store.
1139     Register StorePtr =
1140         CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1141     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1142     CurrOffset += CopyTy.getSizeInBytes();
1143     Size -= CopyTy.getSizeInBytes();
1144   }
1145 
1146   MI.eraseFromParent();
1147   return true;
1148 }
1149 
1150 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1151                                      Register Src, unsigned KnownLen,
1152                                      Align DstAlign, Align SrcAlign,
1153                                      bool IsVolatile) {
1154   auto &MF = *MI.getParent()->getParent();
1155   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1156   auto &DL = MF.getDataLayout();
1157   LLVMContext &C = MF.getFunction().getContext();
1158 
1159   assert(KnownLen != 0 && "Have a zero length memmove length!");
1160 
1161   bool DstAlignCanChange = false;
1162   MachineFrameInfo &MFI = MF.getFrameInfo();
1163   bool OptSize = shouldLowerMemFuncForSize(MF);
1164   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1165 
1166   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1167   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1168     DstAlignCanChange = true;
1169 
1170   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1171   std::vector<LLT> MemOps;
1172 
1173   const auto &DstMMO = **MI.memoperands_begin();
1174   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1175   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1176   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1177 
1178   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1179   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1180   // same thing here.
1181   if (!findGISelOptimalMemOpLowering(
1182           MemOps, Limit,
1183           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1184                       /*IsVolatile*/ true),
1185           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1186           MF.getFunction().getAttributes(), TLI))
1187     return false;
1188 
1189   if (DstAlignCanChange) {
1190     // Get an estimate of the type from the LLT.
1191     Type *IRTy = getTypeForLLT(MemOps[0], C);
1192     Align NewAlign = DL.getABITypeAlign(IRTy);
1193 
1194     // Don't promote to an alignment that would require dynamic stack
1195     // realignment.
1196     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1197     if (!TRI->needsStackRealignment(MF))
1198       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1199         NewAlign = NewAlign / 2;
1200 
1201     if (NewAlign > Alignment) {
1202       Alignment = NewAlign;
1203       unsigned FI = FIDef->getOperand(1).getIndex();
1204       // Give the stack frame object a larger alignment if needed.
1205       if (MFI.getObjectAlign(FI) < Alignment)
1206         MFI.setObjectAlignment(FI, Alignment);
1207     }
1208   }
1209 
1210   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1211 
1212   MachineIRBuilder MIB(MI);
1213   // Memmove requires that we perform the loads first before issuing the stores.
1214   // Apart from that, this loop is pretty much doing the same thing as the
1215   // memcpy codegen function.
1216   unsigned CurrOffset = 0;
1217   LLT PtrTy = MRI.getType(Src);
1218   SmallVector<Register, 16> LoadVals;
1219   for (auto CopyTy : MemOps) {
1220     // Construct MMO for the load.
1221     auto *LoadMMO =
1222         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1223 
1224     // Create the load.
1225     Register LoadPtr = Src;
1226     if (CurrOffset != 0) {
1227       auto Offset =
1228           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1229       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1230     }
1231     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1232     CurrOffset += CopyTy.getSizeInBytes();
1233   }
1234 
1235   CurrOffset = 0;
1236   for (unsigned I = 0; I < MemOps.size(); ++I) {
1237     LLT CopyTy = MemOps[I];
1238     // Now store the values loaded.
1239     auto *StoreMMO =
1240         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1241 
1242     Register StorePtr = Dst;
1243     if (CurrOffset != 0) {
1244       auto Offset =
1245           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1246       StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1247     }
1248     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1249     CurrOffset += CopyTy.getSizeInBytes();
1250   }
1251   MI.eraseFromParent();
1252   return true;
1253 }
1254 
1255 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1256   // This combine is fairly complex so it's not written with a separate
1257   // matcher function.
1258   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1259   Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
1260   assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1261           ID == Intrinsic::memset) &&
1262          "Expected a memcpy like intrinsic");
1263 
1264   auto MMOIt = MI.memoperands_begin();
1265   const MachineMemOperand *MemOp = *MMOIt;
1266   bool IsVolatile = MemOp->isVolatile();
1267   // Don't try to optimize volatile.
1268   if (IsVolatile)
1269     return false;
1270 
1271   Align DstAlign(MemOp->getBaseAlignment());
1272   Align SrcAlign;
1273   Register Dst = MI.getOperand(1).getReg();
1274   Register Src = MI.getOperand(2).getReg();
1275   Register Len = MI.getOperand(3).getReg();
1276 
1277   if (ID != Intrinsic::memset) {
1278     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1279     MemOp = *(++MMOIt);
1280     SrcAlign = Align(MemOp->getBaseAlignment());
1281   }
1282 
1283   // See if this is a constant length copy
1284   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1285   if (!LenVRegAndVal)
1286     return false; // Leave it to the legalizer to lower it to a libcall.
1287   unsigned KnownLen = LenVRegAndVal->Value;
1288 
1289   if (KnownLen == 0) {
1290     MI.eraseFromParent();
1291     return true;
1292   }
1293 
1294   if (MaxLen && KnownLen > MaxLen)
1295     return false;
1296 
1297   if (ID == Intrinsic::memcpy)
1298     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1299   if (ID == Intrinsic::memmove)
1300     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1301   if (ID == Intrinsic::memset)
1302     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1303   return false;
1304 }
1305 
1306 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1307                                            PtrAddChain &MatchInfo) {
1308   // We're trying to match the following pattern:
1309   //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1310   //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1311   // -->
1312   //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1313 
1314   if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1315     return false;
1316 
1317   Register Add2 = MI.getOperand(1).getReg();
1318   Register Imm1 = MI.getOperand(2).getReg();
1319   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1320   if (!MaybeImmVal)
1321     return false;
1322 
1323   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1324   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1325     return false;
1326 
1327   Register Base = Add2Def->getOperand(1).getReg();
1328   Register Imm2 = Add2Def->getOperand(2).getReg();
1329   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1330   if (!MaybeImm2Val)
1331     return false;
1332 
1333   // Pass the combined immediate to the apply function.
1334   MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1335   MatchInfo.Base = Base;
1336   return true;
1337 }
1338 
1339 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1340                                            PtrAddChain &MatchInfo) {
1341   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1342   MachineIRBuilder MIB(MI);
1343   LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1344   auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1345   Observer.changingInstr(MI);
1346   MI.getOperand(1).setReg(MatchInfo.Base);
1347   MI.getOperand(2).setReg(NewOffset.getReg(0));
1348   Observer.changedInstr(MI);
1349   return true;
1350 }
1351 
1352 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1353                                           unsigned &ShiftVal) {
1354   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1355   auto MaybeImmVal =
1356       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1357   if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
1358     return false;
1359   ShiftVal = Log2_64(MaybeImmVal->Value);
1360   return true;
1361 }
1362 
1363 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1364                                           unsigned &ShiftVal) {
1365   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1366   MachineIRBuilder MIB(MI);
1367   LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1368   auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1369   Observer.changingInstr(MI);
1370   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1371   MI.getOperand(2).setReg(ShiftCst.getReg(0));
1372   Observer.changedInstr(MI);
1373   return true;
1374 }
1375 
1376 bool CombinerHelper::tryCombine(MachineInstr &MI) {
1377   if (tryCombineCopy(MI))
1378     return true;
1379   if (tryCombineExtendingLoads(MI))
1380     return true;
1381   if (tryCombineIndexedLoadStore(MI))
1382     return true;
1383   return false;
1384 }
1385