1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/CodeGen/GlobalISel/Combiner.h"
10 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
11 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
12 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
13 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
14 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
15 #include "llvm/CodeGen/GlobalISel/Utils.h"
16 #include "llvm/CodeGen/MachineDominators.h"
17 #include "llvm/CodeGen/MachineFrameInfo.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineRegisterInfo.h"
20 #include "llvm/CodeGen/TargetInstrInfo.h"
21 #include "llvm/CodeGen/TargetLowering.h"
22 #include "llvm/Support/MathExtras.h"
23 #include "llvm/Target/TargetMachine.h"
24 
25 #define DEBUG_TYPE "gi-combiner"
26 
27 using namespace llvm;
28 using namespace MIPatternMatch;
29 
30 // Option to allow testing of the combiner while no targets know about indexed
31 // addressing.
32 static cl::opt<bool>
33     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
34                        cl::desc("Force all indexed operations to be "
35                                 "legal for the GlobalISel combiner"));
36 
37 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
38                                MachineIRBuilder &B, GISelKnownBits *KB,
39                                MachineDominatorTree *MDT,
40                                const LegalizerInfo *LI)
41     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
42       KB(KB), MDT(MDT), LI(LI) {
43   (void)this->KB;
44 }
45 
46 bool CombinerHelper::isLegalOrBeforeLegalizer(
47     const LegalityQuery &Query) const {
48   return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
49 }
50 
51 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
52                                     Register ToReg) const {
53   Observer.changingAllUsesOfReg(MRI, FromReg);
54 
55   if (MRI.constrainRegAttrs(ToReg, FromReg))
56     MRI.replaceRegWith(FromReg, ToReg);
57   else
58     Builder.buildCopy(ToReg, FromReg);
59 
60   Observer.finishedChangingAllUsesOfReg();
61 }
62 
63 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
64                                       MachineOperand &FromRegOp,
65                                       Register ToReg) const {
66   assert(FromRegOp.getParent() && "Expected an operand in an MI");
67   Observer.changingInstr(*FromRegOp.getParent());
68 
69   FromRegOp.setReg(ToReg);
70 
71   Observer.changedInstr(*FromRegOp.getParent());
72 }
73 
74 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
75   if (matchCombineCopy(MI)) {
76     applyCombineCopy(MI);
77     return true;
78   }
79   return false;
80 }
81 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
82   if (MI.getOpcode() != TargetOpcode::COPY)
83     return false;
84   Register DstReg = MI.getOperand(0).getReg();
85   Register SrcReg = MI.getOperand(1).getReg();
86   return canReplaceReg(DstReg, SrcReg, MRI);
87 }
88 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
89   Register DstReg = MI.getOperand(0).getReg();
90   Register SrcReg = MI.getOperand(1).getReg();
91   MI.eraseFromParent();
92   replaceRegWith(MRI, DstReg, SrcReg);
93 }
94 
95 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
96   bool IsUndef = false;
97   SmallVector<Register, 4> Ops;
98   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
99     applyCombineConcatVectors(MI, IsUndef, Ops);
100     return true;
101   }
102   return false;
103 }
104 
105 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
106                                                SmallVectorImpl<Register> &Ops) {
107   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
108          "Invalid instruction");
109   IsUndef = true;
110   MachineInstr *Undef = nullptr;
111 
112   // Walk over all the operands of concat vectors and check if they are
113   // build_vector themselves or undef.
114   // Then collect their operands in Ops.
115   for (const MachineOperand &MO : MI.uses()) {
116     Register Reg = MO.getReg();
117     MachineInstr *Def = MRI.getVRegDef(Reg);
118     assert(Def && "Operand not defined");
119     switch (Def->getOpcode()) {
120     case TargetOpcode::G_BUILD_VECTOR:
121       IsUndef = false;
122       // Remember the operands of the build_vector to fold
123       // them into the yet-to-build flattened concat vectors.
124       for (const MachineOperand &BuildVecMO : Def->uses())
125         Ops.push_back(BuildVecMO.getReg());
126       break;
127     case TargetOpcode::G_IMPLICIT_DEF: {
128       LLT OpType = MRI.getType(Reg);
129       // Keep one undef value for all the undef operands.
130       if (!Undef) {
131         Builder.setInsertPt(*MI.getParent(), MI);
132         Undef = Builder.buildUndef(OpType.getScalarType());
133       }
134       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
135                  OpType.getScalarType() &&
136              "All undefs should have the same type");
137       // Break the undef vector in as many scalar elements as needed
138       // for the flattening.
139       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
140            EltIdx != EltEnd; ++EltIdx)
141         Ops.push_back(Undef->getOperand(0).getReg());
142       break;
143     }
144     default:
145       return false;
146     }
147   }
148   return true;
149 }
150 void CombinerHelper::applyCombineConcatVectors(
151     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
152   // We determined that the concat_vectors can be flatten.
153   // Generate the flattened build_vector.
154   Register DstReg = MI.getOperand(0).getReg();
155   Builder.setInsertPt(*MI.getParent(), MI);
156   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
157 
158   // Note: IsUndef is sort of redundant. We could have determine it by
159   // checking that at all Ops are undef.  Alternatively, we could have
160   // generate a build_vector of undefs and rely on another combine to
161   // clean that up.  For now, given we already gather this information
162   // in tryCombineConcatVectors, just save compile time and issue the
163   // right thing.
164   if (IsUndef)
165     Builder.buildUndef(NewDstReg);
166   else
167     Builder.buildBuildVector(NewDstReg, Ops);
168   MI.eraseFromParent();
169   replaceRegWith(MRI, DstReg, NewDstReg);
170 }
171 
172 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
173   SmallVector<Register, 4> Ops;
174   if (matchCombineShuffleVector(MI, Ops)) {
175     applyCombineShuffleVector(MI, Ops);
176     return true;
177   }
178   return false;
179 }
180 
181 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
182                                                SmallVectorImpl<Register> &Ops) {
183   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
184          "Invalid instruction kind");
185   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
186   Register Src1 = MI.getOperand(1).getReg();
187   LLT SrcType = MRI.getType(Src1);
188   // As bizarre as it may look, shuffle vector can actually produce
189   // scalar! This is because at the IR level a <1 x ty> shuffle
190   // vector is perfectly valid.
191   unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
192   unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
193 
194   // If the resulting vector is smaller than the size of the source
195   // vectors being concatenated, we won't be able to replace the
196   // shuffle vector into a concat_vectors.
197   //
198   // Note: We may still be able to produce a concat_vectors fed by
199   //       extract_vector_elt and so on. It is less clear that would
200   //       be better though, so don't bother for now.
201   //
202   // If the destination is a scalar, the size of the sources doesn't
203   // matter. we will lower the shuffle to a plain copy. This will
204   // work only if the source and destination have the same size. But
205   // that's covered by the next condition.
206   //
207   // TODO: If the size between the source and destination don't match
208   //       we could still emit an extract vector element in that case.
209   if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
210     return false;
211 
212   // Check that the shuffle mask can be broken evenly between the
213   // different sources.
214   if (DstNumElts % SrcNumElts != 0)
215     return false;
216 
217   // Mask length is a multiple of the source vector length.
218   // Check if the shuffle is some kind of concatenation of the input
219   // vectors.
220   unsigned NumConcat = DstNumElts / SrcNumElts;
221   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
222   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
223   for (unsigned i = 0; i != DstNumElts; ++i) {
224     int Idx = Mask[i];
225     // Undef value.
226     if (Idx < 0)
227       continue;
228     // Ensure the indices in each SrcType sized piece are sequential and that
229     // the same source is used for the whole piece.
230     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
231         (ConcatSrcs[i / SrcNumElts] >= 0 &&
232          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
233       return false;
234     // Remember which source this index came from.
235     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
236   }
237 
238   // The shuffle is concatenating multiple vectors together.
239   // Collect the different operands for that.
240   Register UndefReg;
241   Register Src2 = MI.getOperand(2).getReg();
242   for (auto Src : ConcatSrcs) {
243     if (Src < 0) {
244       if (!UndefReg) {
245         Builder.setInsertPt(*MI.getParent(), MI);
246         UndefReg = Builder.buildUndef(SrcType).getReg(0);
247       }
248       Ops.push_back(UndefReg);
249     } else if (Src == 0)
250       Ops.push_back(Src1);
251     else
252       Ops.push_back(Src2);
253   }
254   return true;
255 }
256 
257 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
258                                                const ArrayRef<Register> Ops) {
259   Register DstReg = MI.getOperand(0).getReg();
260   Builder.setInsertPt(*MI.getParent(), MI);
261   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
262 
263   if (Ops.size() == 1)
264     Builder.buildCopy(NewDstReg, Ops[0]);
265   else
266     Builder.buildMerge(NewDstReg, Ops);
267 
268   MI.eraseFromParent();
269   replaceRegWith(MRI, DstReg, NewDstReg);
270 }
271 
272 namespace {
273 
274 /// Select a preference between two uses. CurrentUse is the current preference
275 /// while *ForCandidate is attributes of the candidate under consideration.
276 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
277                                   const LLT TyForCandidate,
278                                   unsigned OpcodeForCandidate,
279                                   MachineInstr *MIForCandidate) {
280   if (!CurrentUse.Ty.isValid()) {
281     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
282         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
283       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
284     return CurrentUse;
285   }
286 
287   // We permit the extend to hoist through basic blocks but this is only
288   // sensible if the target has extending loads. If you end up lowering back
289   // into a load and extend during the legalizer then the end result is
290   // hoisting the extend up to the load.
291 
292   // Prefer defined extensions to undefined extensions as these are more
293   // likely to reduce the number of instructions.
294   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
295       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
296     return CurrentUse;
297   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
298            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
299     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
300 
301   // Prefer sign extensions to zero extensions as sign-extensions tend to be
302   // more expensive.
303   if (CurrentUse.Ty == TyForCandidate) {
304     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
305         OpcodeForCandidate == TargetOpcode::G_ZEXT)
306       return CurrentUse;
307     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
308              OpcodeForCandidate == TargetOpcode::G_SEXT)
309       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
310   }
311 
312   // This is potentially target specific. We've chosen the largest type
313   // because G_TRUNC is usually free. One potential catch with this is that
314   // some targets have a reduced number of larger registers than smaller
315   // registers and this choice potentially increases the live-range for the
316   // larger value.
317   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
318     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
319   }
320   return CurrentUse;
321 }
322 
323 /// Find a suitable place to insert some instructions and insert them. This
324 /// function accounts for special cases like inserting before a PHI node.
325 /// The current strategy for inserting before PHI's is to duplicate the
326 /// instructions for each predecessor. However, while that's ok for G_TRUNC
327 /// on most targets since it generally requires no code, other targets/cases may
328 /// want to try harder to find a dominating block.
329 static void InsertInsnsWithoutSideEffectsBeforeUse(
330     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
331     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
332                        MachineOperand &UseMO)>
333         Inserter) {
334   MachineInstr &UseMI = *UseMO.getParent();
335 
336   MachineBasicBlock *InsertBB = UseMI.getParent();
337 
338   // If the use is a PHI then we want the predecessor block instead.
339   if (UseMI.isPHI()) {
340     MachineOperand *PredBB = std::next(&UseMO);
341     InsertBB = PredBB->getMBB();
342   }
343 
344   // If the block is the same block as the def then we want to insert just after
345   // the def instead of at the start of the block.
346   if (InsertBB == DefMI.getParent()) {
347     MachineBasicBlock::iterator InsertPt = &DefMI;
348     Inserter(InsertBB, std::next(InsertPt), UseMO);
349     return;
350   }
351 
352   // Otherwise we want the start of the BB
353   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
354 }
355 } // end anonymous namespace
356 
357 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
358   PreferredTuple Preferred;
359   if (matchCombineExtendingLoads(MI, Preferred)) {
360     applyCombineExtendingLoads(MI, Preferred);
361     return true;
362   }
363   return false;
364 }
365 
366 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
367                                                 PreferredTuple &Preferred) {
368   // We match the loads and follow the uses to the extend instead of matching
369   // the extends and following the def to the load. This is because the load
370   // must remain in the same position for correctness (unless we also add code
371   // to find a safe place to sink it) whereas the extend is freely movable.
372   // It also prevents us from duplicating the load for the volatile case or just
373   // for performance.
374 
375   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
376       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
377       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
378     return false;
379 
380   auto &LoadValue = MI.getOperand(0);
381   assert(LoadValue.isReg() && "Result wasn't a register?");
382 
383   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
384   if (!LoadValueTy.isScalar())
385     return false;
386 
387   // Most architectures are going to legalize <s8 loads into at least a 1 byte
388   // load, and the MMOs can only describe memory accesses in multiples of bytes.
389   // If we try to perform extload combining on those, we can end up with
390   // %a(s8) = extload %ptr (load 1 byte from %ptr)
391   // ... which is an illegal extload instruction.
392   if (LoadValueTy.getSizeInBits() < 8)
393     return false;
394 
395   // For non power-of-2 types, they will very likely be legalized into multiple
396   // loads. Don't bother trying to match them into extending loads.
397   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
398     return false;
399 
400   // Find the preferred type aside from the any-extends (unless it's the only
401   // one) and non-extending ops. We'll emit an extending load to that type and
402   // and emit a variant of (extend (trunc X)) for the others according to the
403   // relative type sizes. At the same time, pick an extend to use based on the
404   // extend involved in the chosen type.
405   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
406                                  ? TargetOpcode::G_ANYEXT
407                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
408                                        ? TargetOpcode::G_SEXT
409                                        : TargetOpcode::G_ZEXT;
410   Preferred = {LLT(), PreferredOpcode, nullptr};
411   for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
412     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
413         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
414         (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
415       // Check for legality.
416       if (LI) {
417         LegalityQuery::MemDesc MMDesc;
418         const auto &MMO = **MI.memoperands_begin();
419         MMDesc.SizeInBits = MMO.getSizeInBits();
420         MMDesc.AlignInBits = MMO.getAlign().value() * 8;
421         MMDesc.Ordering = MMO.getOrdering();
422         LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
423         LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
424         if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
425             LegalizeActions::Legal)
426           continue;
427       }
428       Preferred = ChoosePreferredUse(Preferred,
429                                      MRI.getType(UseMI.getOperand(0).getReg()),
430                                      UseMI.getOpcode(), &UseMI);
431     }
432   }
433 
434   // There were no extends
435   if (!Preferred.MI)
436     return false;
437   // It should be impossible to chose an extend without selecting a different
438   // type since by definition the result of an extend is larger.
439   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
440 
441   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
442   return true;
443 }
444 
445 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
446                                                 PreferredTuple &Preferred) {
447   // Rewrite the load to the chosen extending load.
448   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
449 
450   // Inserter to insert a truncate back to the original type at a given point
451   // with some basic CSE to limit truncate duplication to one per BB.
452   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
453   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
454                            MachineBasicBlock::iterator InsertBefore,
455                            MachineOperand &UseMO) {
456     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
457     if (PreviouslyEmitted) {
458       Observer.changingInstr(*UseMO.getParent());
459       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
460       Observer.changedInstr(*UseMO.getParent());
461       return;
462     }
463 
464     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
465     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
466     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
467     EmittedInsns[InsertIntoBB] = NewMI;
468     replaceRegOpWith(MRI, UseMO, NewDstReg);
469   };
470 
471   Observer.changingInstr(MI);
472   MI.setDesc(
473       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
474                                ? TargetOpcode::G_SEXTLOAD
475                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
476                                      ? TargetOpcode::G_ZEXTLOAD
477                                      : TargetOpcode::G_LOAD));
478 
479   // Rewrite all the uses to fix up the types.
480   auto &LoadValue = MI.getOperand(0);
481   SmallVector<MachineOperand *, 4> Uses;
482   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
483     Uses.push_back(&UseMO);
484 
485   for (auto *UseMO : Uses) {
486     MachineInstr *UseMI = UseMO->getParent();
487 
488     // If the extend is compatible with the preferred extend then we should fix
489     // up the type and extend so that it uses the preferred use.
490     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
491         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
492       Register UseDstReg = UseMI->getOperand(0).getReg();
493       MachineOperand &UseSrcMO = UseMI->getOperand(1);
494       const LLT UseDstTy = MRI.getType(UseDstReg);
495       if (UseDstReg != ChosenDstReg) {
496         if (Preferred.Ty == UseDstTy) {
497           // If the use has the same type as the preferred use, then merge
498           // the vregs and erase the extend. For example:
499           //    %1:_(s8) = G_LOAD ...
500           //    %2:_(s32) = G_SEXT %1(s8)
501           //    %3:_(s32) = G_ANYEXT %1(s8)
502           //    ... = ... %3(s32)
503           // rewrites to:
504           //    %2:_(s32) = G_SEXTLOAD ...
505           //    ... = ... %2(s32)
506           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
507           Observer.erasingInstr(*UseMO->getParent());
508           UseMO->getParent()->eraseFromParent();
509         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
510           // If the preferred size is smaller, then keep the extend but extend
511           // from the result of the extending load. For example:
512           //    %1:_(s8) = G_LOAD ...
513           //    %2:_(s32) = G_SEXT %1(s8)
514           //    %3:_(s64) = G_ANYEXT %1(s8)
515           //    ... = ... %3(s64)
516           /// rewrites to:
517           //    %2:_(s32) = G_SEXTLOAD ...
518           //    %3:_(s64) = G_ANYEXT %2:_(s32)
519           //    ... = ... %3(s64)
520           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
521         } else {
522           // If the preferred size is large, then insert a truncate. For
523           // example:
524           //    %1:_(s8) = G_LOAD ...
525           //    %2:_(s64) = G_SEXT %1(s8)
526           //    %3:_(s32) = G_ZEXT %1(s8)
527           //    ... = ... %3(s32)
528           /// rewrites to:
529           //    %2:_(s64) = G_SEXTLOAD ...
530           //    %4:_(s8) = G_TRUNC %2:_(s32)
531           //    %3:_(s64) = G_ZEXT %2:_(s8)
532           //    ... = ... %3(s64)
533           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
534                                                  InsertTruncAt);
535         }
536         continue;
537       }
538       // The use is (one of) the uses of the preferred use we chose earlier.
539       // We're going to update the load to def this value later so just erase
540       // the old extend.
541       Observer.erasingInstr(*UseMO->getParent());
542       UseMO->getParent()->eraseFromParent();
543       continue;
544     }
545 
546     // The use isn't an extend. Truncate back to the type we originally loaded.
547     // This is free on many targets.
548     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
549   }
550 
551   MI.getOperand(0).setReg(ChosenDstReg);
552   Observer.changedInstr(MI);
553 }
554 
555 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
556                                    const MachineInstr &UseMI) {
557   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
558          "shouldn't consider debug uses");
559   assert(DefMI.getParent() == UseMI.getParent());
560   if (&DefMI == &UseMI)
561     return false;
562 
563   // Loop through the basic block until we find one of the instructions.
564   MachineBasicBlock::const_iterator I = DefMI.getParent()->begin();
565   for (; &*I != &DefMI && &*I != &UseMI; ++I)
566     return &*I == &DefMI;
567 
568   llvm_unreachable("Block must contain instructions");
569 }
570 
571 bool CombinerHelper::dominates(const MachineInstr &DefMI,
572                                const MachineInstr &UseMI) {
573   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
574          "shouldn't consider debug uses");
575   if (MDT)
576     return MDT->dominates(&DefMI, &UseMI);
577   else if (DefMI.getParent() != UseMI.getParent())
578     return false;
579 
580   return isPredecessor(DefMI, UseMI);
581 }
582 
583 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
584   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
585   Register SrcReg = MI.getOperand(1).getReg();
586   Register LoadUser = SrcReg;
587 
588   if (MRI.getType(SrcReg).isVector())
589     return false;
590 
591   Register TruncSrc;
592   if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
593     LoadUser = TruncSrc;
594 
595   uint64_t SizeInBits = MI.getOperand(2).getImm();
596   // If the source is a G_SEXTLOAD from the same bit width, then we don't
597   // need any extend at all, just a truncate.
598   if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
599     const auto &MMO = **LoadMI->memoperands_begin();
600     // If truncating more than the original extended value, abort.
601     if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
602       return false;
603     if (MMO.getSizeInBits() == SizeInBits)
604       return true;
605   }
606   return false;
607 }
608 
609 bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
610   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
611   Builder.setInstrAndDebugLoc(MI);
612   Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
613   MI.eraseFromParent();
614   return true;
615 }
616 
617 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
618                                             Register &Base, Register &Offset) {
619   auto &MF = *MI.getParent()->getParent();
620   const auto &TLI = *MF.getSubtarget().getTargetLowering();
621 
622 #ifndef NDEBUG
623   unsigned Opcode = MI.getOpcode();
624   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
625          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
626 #endif
627 
628   Base = MI.getOperand(1).getReg();
629   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
630   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
631     return false;
632 
633   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
634 
635   for (auto &Use : MRI.use_nodbg_instructions(Base)) {
636     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
637       continue;
638 
639     Offset = Use.getOperand(2).getReg();
640     if (!ForceLegalIndexing &&
641         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
642       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
643                         << Use);
644       continue;
645     }
646 
647     // Make sure the offset calculation is before the potentially indexed op.
648     // FIXME: we really care about dependency here. The offset calculation might
649     // be movable.
650     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
651     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
652       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
653                         << Use);
654       continue;
655     }
656 
657     // FIXME: check whether all uses of Base are load/store with foldable
658     // addressing modes. If so, using the normal addr-modes is better than
659     // forming an indexed one.
660 
661     bool MemOpDominatesAddrUses = true;
662     for (auto &PtrAddUse :
663          MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
664       if (!dominates(MI, PtrAddUse)) {
665         MemOpDominatesAddrUses = false;
666         break;
667       }
668     }
669 
670     if (!MemOpDominatesAddrUses) {
671       LLVM_DEBUG(
672           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
673                  << Use);
674       continue;
675     }
676 
677     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
678     Addr = Use.getOperand(0).getReg();
679     return true;
680   }
681 
682   return false;
683 }
684 
685 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
686                                            Register &Base, Register &Offset) {
687   auto &MF = *MI.getParent()->getParent();
688   const auto &TLI = *MF.getSubtarget().getTargetLowering();
689 
690 #ifndef NDEBUG
691   unsigned Opcode = MI.getOpcode();
692   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
693          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
694 #endif
695 
696   Addr = MI.getOperand(1).getReg();
697   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
698   if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
699     return false;
700 
701   Base = AddrDef->getOperand(1).getReg();
702   Offset = AddrDef->getOperand(2).getReg();
703 
704   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
705 
706   if (!ForceLegalIndexing &&
707       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
708     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
709     return false;
710   }
711 
712   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
713   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
714     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
715     return false;
716   }
717 
718   if (MI.getOpcode() == TargetOpcode::G_STORE) {
719     // Would require a copy.
720     if (Base == MI.getOperand(0).getReg()) {
721       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
722       return false;
723     }
724 
725     // We're expecting one use of Addr in MI, but it could also be the
726     // value stored, which isn't actually dominated by the instruction.
727     if (MI.getOperand(0).getReg() == Addr) {
728       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
729       return false;
730     }
731   }
732 
733   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
734   // That might allow us to end base's liveness here by adjusting the constant.
735 
736   for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
737     if (!dominates(MI, UseMI)) {
738       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
739       return false;
740     }
741   }
742 
743   return true;
744 }
745 
746 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
747   IndexedLoadStoreMatchInfo MatchInfo;
748   if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
749     applyCombineIndexedLoadStore(MI, MatchInfo);
750     return true;
751   }
752   return false;
753 }
754 
755 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
756   unsigned Opcode = MI.getOpcode();
757   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
758       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
759     return false;
760 
761   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
762                                           MatchInfo.Offset);
763   if (!MatchInfo.IsPre &&
764       !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
765                               MatchInfo.Offset))
766     return false;
767 
768   return true;
769 }
770 
771 void CombinerHelper::applyCombineIndexedLoadStore(
772     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
773   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
774   MachineIRBuilder MIRBuilder(MI);
775   unsigned Opcode = MI.getOpcode();
776   bool IsStore = Opcode == TargetOpcode::G_STORE;
777   unsigned NewOpcode;
778   switch (Opcode) {
779   case TargetOpcode::G_LOAD:
780     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
781     break;
782   case TargetOpcode::G_SEXTLOAD:
783     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
784     break;
785   case TargetOpcode::G_ZEXTLOAD:
786     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
787     break;
788   case TargetOpcode::G_STORE:
789     NewOpcode = TargetOpcode::G_INDEXED_STORE;
790     break;
791   default:
792     llvm_unreachable("Unknown load/store opcode");
793   }
794 
795   auto MIB = MIRBuilder.buildInstr(NewOpcode);
796   if (IsStore) {
797     MIB.addDef(MatchInfo.Addr);
798     MIB.addUse(MI.getOperand(0).getReg());
799   } else {
800     MIB.addDef(MI.getOperand(0).getReg());
801     MIB.addDef(MatchInfo.Addr);
802   }
803 
804   MIB.addUse(MatchInfo.Base);
805   MIB.addUse(MatchInfo.Offset);
806   MIB.addImm(MatchInfo.IsPre);
807   MI.eraseFromParent();
808   AddrDef.eraseFromParent();
809 
810   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
811 }
812 
813 bool CombinerHelper::matchElideBrByInvertingCond(MachineInstr &MI) {
814   if (MI.getOpcode() != TargetOpcode::G_BR)
815     return false;
816 
817   // Try to match the following:
818   // bb1:
819   //   %c(s32) = G_ICMP pred, %a, %b
820   //   %c1(s1) = G_TRUNC %c(s32)
821   //   G_BRCOND %c1, %bb2
822   //   G_BR %bb3
823   // bb2:
824   // ...
825   // bb3:
826 
827   // The above pattern does not have a fall through to the successor bb2, always
828   // resulting in a branch no matter which path is taken. Here we try to find
829   // and replace that pattern with conditional branch to bb3 and otherwise
830   // fallthrough to bb2.
831 
832   MachineBasicBlock *MBB = MI.getParent();
833   MachineBasicBlock::iterator BrIt(MI);
834   if (BrIt == MBB->begin())
835     return false;
836   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
837 
838   MachineInstr *BrCond = &*std::prev(BrIt);
839   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
840     return false;
841 
842   // Check that the next block is the conditional branch target.
843   if (!MBB->isLayoutSuccessor(BrCond->getOperand(1).getMBB()))
844     return false;
845 
846   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
847   if (!CmpMI || CmpMI->getOpcode() != TargetOpcode::G_ICMP ||
848       !MRI.hasOneNonDBGUse(CmpMI->getOperand(0).getReg()))
849     return false;
850   return true;
851 }
852 
853 bool CombinerHelper::tryElideBrByInvertingCond(MachineInstr &MI) {
854   if (!matchElideBrByInvertingCond(MI))
855     return false;
856   applyElideBrByInvertingCond(MI);
857   return true;
858 }
859 
860 void CombinerHelper::applyElideBrByInvertingCond(MachineInstr &MI) {
861   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
862   MachineBasicBlock::iterator BrIt(MI);
863   MachineInstr *BrCond = &*std::prev(BrIt);
864   MachineInstr *CmpMI = MRI.getVRegDef(BrCond->getOperand(0).getReg());
865 
866   CmpInst::Predicate InversePred = CmpInst::getInversePredicate(
867       (CmpInst::Predicate)CmpMI->getOperand(1).getPredicate());
868 
869   // Invert the G_ICMP condition.
870   Observer.changingInstr(*CmpMI);
871   CmpMI->getOperand(1).setPredicate(InversePred);
872   Observer.changedInstr(*CmpMI);
873 
874   // Change the conditional branch target.
875   Observer.changingInstr(*BrCond);
876   BrCond->getOperand(1).setMBB(BrTarget);
877   Observer.changedInstr(*BrCond);
878   MI.eraseFromParent();
879 }
880 
881 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
882   // On Darwin, -Os means optimize for size without hurting performance, so
883   // only really optimize for size when -Oz (MinSize) is used.
884   if (MF.getTarget().getTargetTriple().isOSDarwin())
885     return MF.getFunction().hasMinSize();
886   return MF.getFunction().hasOptSize();
887 }
888 
889 // Returns a list of types to use for memory op lowering in MemOps. A partial
890 // port of findOptimalMemOpLowering in TargetLowering.
891 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
892                                           unsigned Limit, const MemOp &Op,
893                                           unsigned DstAS, unsigned SrcAS,
894                                           const AttributeList &FuncAttributes,
895                                           const TargetLowering &TLI) {
896   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
897     return false;
898 
899   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
900 
901   if (Ty == LLT()) {
902     // Use the largest scalar type whose alignment constraints are satisfied.
903     // We only need to check DstAlign here as SrcAlign is always greater or
904     // equal to DstAlign (or zero).
905     Ty = LLT::scalar(64);
906     if (Op.isFixedDstAlign())
907       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
908              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
909         Ty = LLT::scalar(Ty.getSizeInBytes());
910     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
911     // FIXME: check for the largest legal type we can load/store to.
912   }
913 
914   unsigned NumMemOps = 0;
915   uint64_t Size = Op.size();
916   while (Size) {
917     unsigned TySize = Ty.getSizeInBytes();
918     while (TySize > Size) {
919       // For now, only use non-vector load / store's for the left-over pieces.
920       LLT NewTy = Ty;
921       // FIXME: check for mem op safety and legality of the types. Not all of
922       // SDAGisms map cleanly to GISel concepts.
923       if (NewTy.isVector())
924         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
925       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
926       unsigned NewTySize = NewTy.getSizeInBytes();
927       assert(NewTySize > 0 && "Could not find appropriate type");
928 
929       // If the new LLT cannot cover all of the remaining bits, then consider
930       // issuing a (or a pair of) unaligned and overlapping load / store.
931       bool Fast;
932       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
933       MVT VT = getMVTForLLT(Ty);
934       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
935           TLI.allowsMisalignedMemoryAccesses(
936               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign().value() : 0,
937               MachineMemOperand::MONone, &Fast) &&
938           Fast)
939         TySize = Size;
940       else {
941         Ty = NewTy;
942         TySize = NewTySize;
943       }
944     }
945 
946     if (++NumMemOps > Limit)
947       return false;
948 
949     MemOps.push_back(Ty);
950     Size -= TySize;
951   }
952 
953   return true;
954 }
955 
956 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
957   if (Ty.isVector())
958     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
959                                 Ty.getNumElements());
960   return IntegerType::get(C, Ty.getSizeInBits());
961 }
962 
963 // Get a vectorized representation of the memset value operand, GISel edition.
964 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
965   MachineRegisterInfo &MRI = *MIB.getMRI();
966   unsigned NumBits = Ty.getScalarSizeInBits();
967   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
968   if (!Ty.isVector() && ValVRegAndVal) {
969     unsigned KnownVal = ValVRegAndVal->Value;
970     APInt Scalar = APInt(8, KnownVal);
971     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
972     return MIB.buildConstant(Ty, SplatVal).getReg(0);
973   }
974 
975   // Extend the byte value to the larger type, and then multiply by a magic
976   // value 0x010101... in order to replicate it across every byte.
977   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
978   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
979     return MIB.buildConstant(Ty, 0).getReg(0);
980   }
981 
982   LLT ExtType = Ty.getScalarType();
983   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
984   if (NumBits > 8) {
985     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
986     auto MagicMI = MIB.buildConstant(ExtType, Magic);
987     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
988   }
989 
990   // For vector types create a G_BUILD_VECTOR.
991   if (Ty.isVector())
992     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
993 
994   return Val;
995 }
996 
997 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
998                                     Register Val, unsigned KnownLen,
999                                     Align Alignment, bool IsVolatile) {
1000   auto &MF = *MI.getParent()->getParent();
1001   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1002   auto &DL = MF.getDataLayout();
1003   LLVMContext &C = MF.getFunction().getContext();
1004 
1005   assert(KnownLen != 0 && "Have a zero length memset length!");
1006 
1007   bool DstAlignCanChange = false;
1008   MachineFrameInfo &MFI = MF.getFrameInfo();
1009   bool OptSize = shouldLowerMemFuncForSize(MF);
1010 
1011   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1012   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1013     DstAlignCanChange = true;
1014 
1015   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
1016   std::vector<LLT> MemOps;
1017 
1018   const auto &DstMMO = **MI.memoperands_begin();
1019   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1020 
1021   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1022   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1023 
1024   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1025                                      MemOp::Set(KnownLen, DstAlignCanChange,
1026                                                 Alignment,
1027                                                 /*IsZeroMemset=*/IsZeroVal,
1028                                                 /*IsVolatile=*/IsVolatile),
1029                                      DstPtrInfo.getAddrSpace(), ~0u,
1030                                      MF.getFunction().getAttributes(), TLI))
1031     return false;
1032 
1033   if (DstAlignCanChange) {
1034     // Get an estimate of the type from the LLT.
1035     Type *IRTy = getTypeForLLT(MemOps[0], C);
1036     Align NewAlign = DL.getABITypeAlign(IRTy);
1037     if (NewAlign > Alignment) {
1038       Alignment = NewAlign;
1039       unsigned FI = FIDef->getOperand(1).getIndex();
1040       // Give the stack frame object a larger alignment if needed.
1041       if (MFI.getObjectAlign(FI) < Alignment)
1042         MFI.setObjectAlignment(FI, Alignment);
1043     }
1044   }
1045 
1046   MachineIRBuilder MIB(MI);
1047   // Find the largest store and generate the bit pattern for it.
1048   LLT LargestTy = MemOps[0];
1049   for (unsigned i = 1; i < MemOps.size(); i++)
1050     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1051       LargestTy = MemOps[i];
1052 
1053   // The memset stored value is always defined as an s8, so in order to make it
1054   // work with larger store types we need to repeat the bit pattern across the
1055   // wider type.
1056   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1057 
1058   if (!MemSetValue)
1059     return false;
1060 
1061   // Generate the stores. For each store type in the list, we generate the
1062   // matching store of that type to the destination address.
1063   LLT PtrTy = MRI.getType(Dst);
1064   unsigned DstOff = 0;
1065   unsigned Size = KnownLen;
1066   for (unsigned I = 0; I < MemOps.size(); I++) {
1067     LLT Ty = MemOps[I];
1068     unsigned TySize = Ty.getSizeInBytes();
1069     if (TySize > Size) {
1070       // Issuing an unaligned load / store pair that overlaps with the previous
1071       // pair. Adjust the offset accordingly.
1072       assert(I == MemOps.size() - 1 && I != 0);
1073       DstOff -= TySize - Size;
1074     }
1075 
1076     // If this store is smaller than the largest store see whether we can get
1077     // the smaller value for free with a truncate.
1078     Register Value = MemSetValue;
1079     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1080       MVT VT = getMVTForLLT(Ty);
1081       MVT LargestVT = getMVTForLLT(LargestTy);
1082       if (!LargestTy.isVector() && !Ty.isVector() &&
1083           TLI.isTruncateFree(LargestVT, VT))
1084         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1085       else
1086         Value = getMemsetValue(Val, Ty, MIB);
1087       if (!Value)
1088         return false;
1089     }
1090 
1091     auto *StoreMMO =
1092         MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1093 
1094     Register Ptr = Dst;
1095     if (DstOff != 0) {
1096       auto Offset =
1097           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1098       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1099     }
1100 
1101     MIB.buildStore(Value, Ptr, *StoreMMO);
1102     DstOff += Ty.getSizeInBytes();
1103     Size -= TySize;
1104   }
1105 
1106   MI.eraseFromParent();
1107   return true;
1108 }
1109 
1110 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1111                                     Register Src, unsigned KnownLen,
1112                                     Align DstAlign, Align SrcAlign,
1113                                     bool IsVolatile) {
1114   auto &MF = *MI.getParent()->getParent();
1115   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1116   auto &DL = MF.getDataLayout();
1117   LLVMContext &C = MF.getFunction().getContext();
1118 
1119   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1120 
1121   bool DstAlignCanChange = false;
1122   MachineFrameInfo &MFI = MF.getFrameInfo();
1123   bool OptSize = shouldLowerMemFuncForSize(MF);
1124   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1125 
1126   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1127   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1128     DstAlignCanChange = true;
1129 
1130   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1131   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1132   // if the memcpy is in a tail call position.
1133 
1134   unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1135   std::vector<LLT> MemOps;
1136 
1137   const auto &DstMMO = **MI.memoperands_begin();
1138   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1139   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1140   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1141 
1142   if (!findGISelOptimalMemOpLowering(
1143           MemOps, Limit,
1144           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1145                       IsVolatile),
1146           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1147           MF.getFunction().getAttributes(), TLI))
1148     return false;
1149 
1150   if (DstAlignCanChange) {
1151     // Get an estimate of the type from the LLT.
1152     Type *IRTy = getTypeForLLT(MemOps[0], C);
1153     Align NewAlign = DL.getABITypeAlign(IRTy);
1154 
1155     // Don't promote to an alignment that would require dynamic stack
1156     // realignment.
1157     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1158     if (!TRI->needsStackRealignment(MF))
1159       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1160         NewAlign = NewAlign / 2;
1161 
1162     if (NewAlign > Alignment) {
1163       Alignment = NewAlign;
1164       unsigned FI = FIDef->getOperand(1).getIndex();
1165       // Give the stack frame object a larger alignment if needed.
1166       if (MFI.getObjectAlign(FI) < Alignment)
1167         MFI.setObjectAlignment(FI, Alignment);
1168     }
1169   }
1170 
1171   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1172 
1173   MachineIRBuilder MIB(MI);
1174   // Now we need to emit a pair of load and stores for each of the types we've
1175   // collected. I.e. for each type, generate a load from the source pointer of
1176   // that type width, and then generate a corresponding store to the dest buffer
1177   // of that value loaded. This can result in a sequence of loads and stores
1178   // mixed types, depending on what the target specifies as good types to use.
1179   unsigned CurrOffset = 0;
1180   LLT PtrTy = MRI.getType(Src);
1181   unsigned Size = KnownLen;
1182   for (auto CopyTy : MemOps) {
1183     // Issuing an unaligned load / store pair  that overlaps with the previous
1184     // pair. Adjust the offset accordingly.
1185     if (CopyTy.getSizeInBytes() > Size)
1186       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1187 
1188     // Construct MMOs for the accesses.
1189     auto *LoadMMO =
1190         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1191     auto *StoreMMO =
1192         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1193 
1194     // Create the load.
1195     Register LoadPtr = Src;
1196     Register Offset;
1197     if (CurrOffset != 0) {
1198       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1199                    .getReg(0);
1200       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1201     }
1202     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1203 
1204     // Create the store.
1205     Register StorePtr =
1206         CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1207     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1208     CurrOffset += CopyTy.getSizeInBytes();
1209     Size -= CopyTy.getSizeInBytes();
1210   }
1211 
1212   MI.eraseFromParent();
1213   return true;
1214 }
1215 
1216 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1217                                      Register Src, unsigned KnownLen,
1218                                      Align DstAlign, Align SrcAlign,
1219                                      bool IsVolatile) {
1220   auto &MF = *MI.getParent()->getParent();
1221   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1222   auto &DL = MF.getDataLayout();
1223   LLVMContext &C = MF.getFunction().getContext();
1224 
1225   assert(KnownLen != 0 && "Have a zero length memmove length!");
1226 
1227   bool DstAlignCanChange = false;
1228   MachineFrameInfo &MFI = MF.getFrameInfo();
1229   bool OptSize = shouldLowerMemFuncForSize(MF);
1230   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1231 
1232   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1233   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1234     DstAlignCanChange = true;
1235 
1236   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1237   std::vector<LLT> MemOps;
1238 
1239   const auto &DstMMO = **MI.memoperands_begin();
1240   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1241   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1242   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1243 
1244   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1245   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1246   // same thing here.
1247   if (!findGISelOptimalMemOpLowering(
1248           MemOps, Limit,
1249           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1250                       /*IsVolatile*/ true),
1251           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1252           MF.getFunction().getAttributes(), TLI))
1253     return false;
1254 
1255   if (DstAlignCanChange) {
1256     // Get an estimate of the type from the LLT.
1257     Type *IRTy = getTypeForLLT(MemOps[0], C);
1258     Align NewAlign = DL.getABITypeAlign(IRTy);
1259 
1260     // Don't promote to an alignment that would require dynamic stack
1261     // realignment.
1262     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1263     if (!TRI->needsStackRealignment(MF))
1264       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1265         NewAlign = NewAlign / 2;
1266 
1267     if (NewAlign > Alignment) {
1268       Alignment = NewAlign;
1269       unsigned FI = FIDef->getOperand(1).getIndex();
1270       // Give the stack frame object a larger alignment if needed.
1271       if (MFI.getObjectAlign(FI) < Alignment)
1272         MFI.setObjectAlignment(FI, Alignment);
1273     }
1274   }
1275 
1276   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1277 
1278   MachineIRBuilder MIB(MI);
1279   // Memmove requires that we perform the loads first before issuing the stores.
1280   // Apart from that, this loop is pretty much doing the same thing as the
1281   // memcpy codegen function.
1282   unsigned CurrOffset = 0;
1283   LLT PtrTy = MRI.getType(Src);
1284   SmallVector<Register, 16> LoadVals;
1285   for (auto CopyTy : MemOps) {
1286     // Construct MMO for the load.
1287     auto *LoadMMO =
1288         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1289 
1290     // Create the load.
1291     Register LoadPtr = Src;
1292     if (CurrOffset != 0) {
1293       auto Offset =
1294           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1295       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1296     }
1297     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1298     CurrOffset += CopyTy.getSizeInBytes();
1299   }
1300 
1301   CurrOffset = 0;
1302   for (unsigned I = 0; I < MemOps.size(); ++I) {
1303     LLT CopyTy = MemOps[I];
1304     // Now store the values loaded.
1305     auto *StoreMMO =
1306         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1307 
1308     Register StorePtr = Dst;
1309     if (CurrOffset != 0) {
1310       auto Offset =
1311           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1312       StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1313     }
1314     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1315     CurrOffset += CopyTy.getSizeInBytes();
1316   }
1317   MI.eraseFromParent();
1318   return true;
1319 }
1320 
1321 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1322   // This combine is fairly complex so it's not written with a separate
1323   // matcher function.
1324   assert(MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS);
1325   Intrinsic::ID ID = (Intrinsic::ID)MI.getIntrinsicID();
1326   assert((ID == Intrinsic::memcpy || ID == Intrinsic::memmove ||
1327           ID == Intrinsic::memset) &&
1328          "Expected a memcpy like intrinsic");
1329 
1330   auto MMOIt = MI.memoperands_begin();
1331   const MachineMemOperand *MemOp = *MMOIt;
1332   bool IsVolatile = MemOp->isVolatile();
1333   // Don't try to optimize volatile.
1334   if (IsVolatile)
1335     return false;
1336 
1337   Align DstAlign = MemOp->getBaseAlign();
1338   Align SrcAlign;
1339   Register Dst = MI.getOperand(1).getReg();
1340   Register Src = MI.getOperand(2).getReg();
1341   Register Len = MI.getOperand(3).getReg();
1342 
1343   if (ID != Intrinsic::memset) {
1344     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1345     MemOp = *(++MMOIt);
1346     SrcAlign = MemOp->getBaseAlign();
1347   }
1348 
1349   // See if this is a constant length copy
1350   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1351   if (!LenVRegAndVal)
1352     return false; // Leave it to the legalizer to lower it to a libcall.
1353   unsigned KnownLen = LenVRegAndVal->Value;
1354 
1355   if (KnownLen == 0) {
1356     MI.eraseFromParent();
1357     return true;
1358   }
1359 
1360   if (MaxLen && KnownLen > MaxLen)
1361     return false;
1362 
1363   if (ID == Intrinsic::memcpy)
1364     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1365   if (ID == Intrinsic::memmove)
1366     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1367   if (ID == Intrinsic::memset)
1368     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1369   return false;
1370 }
1371 
1372 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1373                                            PtrAddChain &MatchInfo) {
1374   // We're trying to match the following pattern:
1375   //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1376   //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1377   // -->
1378   //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1379 
1380   if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1381     return false;
1382 
1383   Register Add2 = MI.getOperand(1).getReg();
1384   Register Imm1 = MI.getOperand(2).getReg();
1385   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1386   if (!MaybeImmVal)
1387     return false;
1388 
1389   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1390   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1391     return false;
1392 
1393   Register Base = Add2Def->getOperand(1).getReg();
1394   Register Imm2 = Add2Def->getOperand(2).getReg();
1395   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1396   if (!MaybeImm2Val)
1397     return false;
1398 
1399   // Pass the combined immediate to the apply function.
1400   MatchInfo.Imm = MaybeImmVal->Value + MaybeImm2Val->Value;
1401   MatchInfo.Base = Base;
1402   return true;
1403 }
1404 
1405 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1406                                            PtrAddChain &MatchInfo) {
1407   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1408   MachineIRBuilder MIB(MI);
1409   LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1410   auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1411   Observer.changingInstr(MI);
1412   MI.getOperand(1).setReg(MatchInfo.Base);
1413   MI.getOperand(2).setReg(NewOffset.getReg(0));
1414   Observer.changedInstr(MI);
1415   return true;
1416 }
1417 
1418 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1419                                           unsigned &ShiftVal) {
1420   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1421   auto MaybeImmVal =
1422       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1423   if (!MaybeImmVal || !isPowerOf2_64(MaybeImmVal->Value))
1424     return false;
1425   ShiftVal = Log2_64(MaybeImmVal->Value);
1426   return true;
1427 }
1428 
1429 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1430                                           unsigned &ShiftVal) {
1431   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1432   MachineIRBuilder MIB(MI);
1433   LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1434   auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1435   Observer.changingInstr(MI);
1436   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1437   MI.getOperand(2).setReg(ShiftCst.getReg(0));
1438   Observer.changedInstr(MI);
1439   return true;
1440 }
1441 
1442 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
1443                                                 unsigned TargetShiftSize,
1444                                                 unsigned &ShiftVal) {
1445   assert((MI.getOpcode() == TargetOpcode::G_SHL ||
1446           MI.getOpcode() == TargetOpcode::G_LSHR ||
1447           MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
1448 
1449   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
1450   if (Ty.isVector()) // TODO:
1451     return false;
1452 
1453   // Don't narrow further than the requested size.
1454   unsigned Size = Ty.getSizeInBits();
1455   if (Size <= TargetShiftSize)
1456     return false;
1457 
1458   auto MaybeImmVal =
1459     getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1460   if (!MaybeImmVal)
1461     return false;
1462 
1463   ShiftVal = MaybeImmVal->Value;
1464   return ShiftVal >= Size / 2 && ShiftVal < Size;
1465 }
1466 
1467 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
1468                                                 const unsigned &ShiftVal) {
1469   Register DstReg = MI.getOperand(0).getReg();
1470   Register SrcReg = MI.getOperand(1).getReg();
1471   LLT Ty = MRI.getType(SrcReg);
1472   unsigned Size = Ty.getSizeInBits();
1473   unsigned HalfSize = Size / 2;
1474   assert(ShiftVal >= HalfSize);
1475 
1476   LLT HalfTy = LLT::scalar(HalfSize);
1477 
1478   Builder.setInstr(MI);
1479   auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
1480   unsigned NarrowShiftAmt = ShiftVal - HalfSize;
1481 
1482   if (MI.getOpcode() == TargetOpcode::G_LSHR) {
1483     Register Narrowed = Unmerge.getReg(1);
1484 
1485     //  dst = G_LSHR s64:x, C for C >= 32
1486     // =>
1487     //   lo, hi = G_UNMERGE_VALUES x
1488     //   dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
1489 
1490     if (NarrowShiftAmt != 0) {
1491       Narrowed = Builder.buildLShr(HalfTy, Narrowed,
1492         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1493     }
1494 
1495     auto Zero = Builder.buildConstant(HalfTy, 0);
1496     Builder.buildMerge(DstReg, { Narrowed, Zero });
1497   } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
1498     Register Narrowed = Unmerge.getReg(0);
1499     //  dst = G_SHL s64:x, C for C >= 32
1500     // =>
1501     //   lo, hi = G_UNMERGE_VALUES x
1502     //   dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
1503     if (NarrowShiftAmt != 0) {
1504       Narrowed = Builder.buildShl(HalfTy, Narrowed,
1505         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
1506     }
1507 
1508     auto Zero = Builder.buildConstant(HalfTy, 0);
1509     Builder.buildMerge(DstReg, { Zero, Narrowed });
1510   } else {
1511     assert(MI.getOpcode() == TargetOpcode::G_ASHR);
1512     auto Hi = Builder.buildAShr(
1513       HalfTy, Unmerge.getReg(1),
1514       Builder.buildConstant(HalfTy, HalfSize - 1));
1515 
1516     if (ShiftVal == HalfSize) {
1517       // (G_ASHR i64:x, 32) ->
1518       //   G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
1519       Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
1520     } else if (ShiftVal == Size - 1) {
1521       // Don't need a second shift.
1522       // (G_ASHR i64:x, 63) ->
1523       //   %narrowed = (G_ASHR hi_32(x), 31)
1524       //   G_MERGE_VALUES %narrowed, %narrowed
1525       Builder.buildMerge(DstReg, { Hi, Hi });
1526     } else {
1527       auto Lo = Builder.buildAShr(
1528         HalfTy, Unmerge.getReg(1),
1529         Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
1530 
1531       // (G_ASHR i64:x, C) ->, for C >= 32
1532       //   G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
1533       Builder.buildMerge(DstReg, { Lo, Hi });
1534     }
1535   }
1536 
1537   MI.eraseFromParent();
1538   return true;
1539 }
1540 
1541 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
1542                                               unsigned TargetShiftAmount) {
1543   unsigned ShiftAmt;
1544   if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
1545     applyCombineShiftToUnmerge(MI, ShiftAmt);
1546     return true;
1547   }
1548 
1549   return false;
1550 }
1551 
1552 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
1553   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
1554   Register DstReg = MI.getOperand(0).getReg();
1555   LLT DstTy = MRI.getType(DstReg);
1556   Register SrcReg = MI.getOperand(1).getReg();
1557   return mi_match(SrcReg, MRI,
1558                   m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
1559 }
1560 
1561 bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
1562   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
1563   Register DstReg = MI.getOperand(0).getReg();
1564   Builder.setInstr(MI);
1565   Builder.buildCopy(DstReg, Reg);
1566   MI.eraseFromParent();
1567   return true;
1568 }
1569 
1570 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
1571   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
1572   Register SrcReg = MI.getOperand(1).getReg();
1573   return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
1574 }
1575 
1576 bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
1577   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
1578   Register DstReg = MI.getOperand(0).getReg();
1579   Builder.setInstr(MI);
1580   Builder.buildZExtOrTrunc(DstReg, Reg);
1581   MI.eraseFromParent();
1582   return true;
1583 }
1584 
1585 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
1586   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1587     return MO.isReg() &&
1588            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1589   });
1590 }
1591 
1592 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
1593   return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
1594     return !MO.isReg() ||
1595            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
1596   });
1597 }
1598 
1599 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
1600   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
1601   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
1602   return all_of(Mask, [](int Elt) { return Elt < 0; });
1603 }
1604 
1605 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
1606   assert(MI.getOpcode() == TargetOpcode::G_STORE);
1607   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
1608                       MRI);
1609 }
1610 
1611 bool CombinerHelper::eraseInst(MachineInstr &MI) {
1612   MI.eraseFromParent();
1613   return true;
1614 }
1615 
1616 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
1617                                     const MachineOperand &MOP2) {
1618   if (!MOP1.isReg() || !MOP2.isReg())
1619     return false;
1620   MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
1621   if (!I1)
1622     return false;
1623   MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
1624   if (!I2)
1625     return false;
1626 
1627   // Handle a case like this:
1628   //
1629   // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
1630   //
1631   // Even though %0 and %1 are produced by the same instruction they are not
1632   // the same values.
1633   if (I1 == I2)
1634     return MOP1.getReg() == MOP2.getReg();
1635 
1636   // If we have an instruction which loads or stores, we can't guarantee that
1637   // it is identical.
1638   //
1639   // For example, we may have
1640   //
1641   // %x1 = G_LOAD %addr (load N from @somewhere)
1642   // ...
1643   // call @foo
1644   // ...
1645   // %x2 = G_LOAD %addr (load N from @somewhere)
1646   // ...
1647   // %or = G_OR %x1, %x2
1648   //
1649   // It's possible that @foo will modify whatever lives at the address we're
1650   // loading from. To be safe, let's just assume that all loads and stores
1651   // are different (unless we have something which is guaranteed to not
1652   // change.)
1653   if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
1654     return false;
1655 
1656   // Check for physical registers on the instructions first to avoid cases
1657   // like this:
1658   //
1659   // %a = COPY $physreg
1660   // ...
1661   // SOMETHING implicit-def $physreg
1662   // ...
1663   // %b = COPY $physreg
1664   //
1665   // These copies are not equivalent.
1666   if (any_of(I1->uses(), [](const MachineOperand &MO) {
1667         return MO.isReg() && MO.getReg().isPhysical();
1668       })) {
1669     // Check if we have a case like this:
1670     //
1671     // %a = COPY $physreg
1672     // %b = COPY %a
1673     //
1674     // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
1675     // From that, we know that they must have the same value, since they must
1676     // have come from the same COPY.
1677     return I1->isIdenticalTo(*I2);
1678   }
1679 
1680   // We don't have any physical registers, so we don't necessarily need the
1681   // same vreg defs.
1682   //
1683   // On the off-chance that there's some target instruction feeding into the
1684   // instruction, let's use produceSameValue instead of isIdenticalTo.
1685   return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
1686 }
1687 
1688 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
1689   if (!MOP.isReg())
1690     return false;
1691   // MIPatternMatch doesn't let us look through G_ZEXT etc.
1692   auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
1693   return ValAndVReg && ValAndVReg->Value == C;
1694 }
1695 
1696 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
1697                                                      unsigned OpIdx) {
1698   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
1699   Register OldReg = MI.getOperand(0).getReg();
1700   Register Replacement = MI.getOperand(OpIdx).getReg();
1701   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
1702   MI.eraseFromParent();
1703   replaceRegWith(MRI, OldReg, Replacement);
1704   return true;
1705 }
1706 
1707 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
1708   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
1709   // Match (cond ? x : x)
1710   return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
1711          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
1712                        MRI);
1713 }
1714 
1715 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
1716   return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
1717          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
1718                        MRI);
1719 }
1720 
1721 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
1722   return matchConstantOp(MI.getOperand(OpIdx), 0) &&
1723          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
1724                        MRI);
1725 }
1726 
1727 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
1728   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1729   Builder.setInstr(MI);
1730   Builder.buildFConstant(MI.getOperand(0), C);
1731   MI.eraseFromParent();
1732   return true;
1733 }
1734 
1735 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
1736   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1737   Builder.setInstr(MI);
1738   Builder.buildConstant(MI.getOperand(0), C);
1739   MI.eraseFromParent();
1740   return true;
1741 }
1742 
1743 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
1744   assert(MI.getNumDefs() == 1 && "Expected only one def?");
1745   Builder.setInstr(MI);
1746   Builder.buildUndef(MI.getOperand(0));
1747   MI.eraseFromParent();
1748   return true;
1749 }
1750 
1751 bool CombinerHelper::matchSimplifyAddToSub(
1752     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1753   Register LHS = MI.getOperand(1).getReg();
1754   Register RHS = MI.getOperand(2).getReg();
1755   Register &NewLHS = std::get<0>(MatchInfo);
1756   Register &NewRHS = std::get<1>(MatchInfo);
1757 
1758   // Helper lambda to check for opportunities for
1759   // ((0-A) + B) -> B - A
1760   // (A + (0-B)) -> A - B
1761   auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
1762     int64_t Cst;
1763     if (!mi_match(MaybeSub, MRI, m_GSub(m_ICst(Cst), m_Reg(NewRHS))) ||
1764         Cst != 0)
1765       return false;
1766     NewLHS = MaybeNewLHS;
1767     return true;
1768   };
1769 
1770   return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
1771 }
1772 
1773 bool CombinerHelper::applySimplifyAddToSub(
1774     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
1775   Builder.setInstr(MI);
1776   Register SubLHS, SubRHS;
1777   std::tie(SubLHS, SubRHS) = MatchInfo;
1778   Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
1779   MI.eraseFromParent();
1780   return true;
1781 }
1782 
1783 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
1784     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
1785   // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
1786   //
1787   // Creates the new hand + logic instruction (but does not insert them.)
1788   //
1789   // On success, MatchInfo is populated with the new instructions. These are
1790   // inserted in applyHoistLogicOpWithSameOpcodeHands.
1791   unsigned LogicOpcode = MI.getOpcode();
1792   assert(LogicOpcode == TargetOpcode::G_AND ||
1793          LogicOpcode == TargetOpcode::G_OR ||
1794          LogicOpcode == TargetOpcode::G_XOR);
1795   MachineIRBuilder MIB(MI);
1796   Register Dst = MI.getOperand(0).getReg();
1797   Register LHSReg = MI.getOperand(1).getReg();
1798   Register RHSReg = MI.getOperand(2).getReg();
1799 
1800   // Don't recompute anything.
1801   if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
1802     return false;
1803 
1804   // Make sure we have (hand x, ...), (hand y, ...)
1805   MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
1806   MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
1807   if (!LeftHandInst || !RightHandInst)
1808     return false;
1809   unsigned HandOpcode = LeftHandInst->getOpcode();
1810   if (HandOpcode != RightHandInst->getOpcode())
1811     return false;
1812   if (!LeftHandInst->getOperand(1).isReg() ||
1813       !RightHandInst->getOperand(1).isReg())
1814     return false;
1815 
1816   // Make sure the types match up, and if we're doing this post-legalization,
1817   // we end up with legal types.
1818   Register X = LeftHandInst->getOperand(1).getReg();
1819   Register Y = RightHandInst->getOperand(1).getReg();
1820   LLT XTy = MRI.getType(X);
1821   LLT YTy = MRI.getType(Y);
1822   if (XTy != YTy)
1823     return false;
1824   if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
1825     return false;
1826 
1827   // Optional extra source register.
1828   Register ExtraHandOpSrcReg;
1829   switch (HandOpcode) {
1830   default:
1831     return false;
1832   case TargetOpcode::G_ANYEXT:
1833   case TargetOpcode::G_SEXT:
1834   case TargetOpcode::G_ZEXT: {
1835     // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
1836     break;
1837   }
1838   case TargetOpcode::G_AND:
1839   case TargetOpcode::G_ASHR:
1840   case TargetOpcode::G_LSHR:
1841   case TargetOpcode::G_SHL: {
1842     // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
1843     MachineOperand &ZOp = LeftHandInst->getOperand(2);
1844     if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
1845       return false;
1846     ExtraHandOpSrcReg = ZOp.getReg();
1847     break;
1848   }
1849   }
1850 
1851   // Record the steps to build the new instructions.
1852   //
1853   // Steps to build (logic x, y)
1854   auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
1855   OperandBuildSteps LogicBuildSteps = {
1856       [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
1857       [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
1858       [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
1859   InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
1860 
1861   // Steps to build hand (logic x, y), ...z
1862   OperandBuildSteps HandBuildSteps = {
1863       [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
1864       [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
1865   if (ExtraHandOpSrcReg.isValid())
1866     HandBuildSteps.push_back(
1867         [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
1868   InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
1869 
1870   MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
1871   return true;
1872 }
1873 
1874 bool CombinerHelper::applyBuildInstructionSteps(
1875     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
1876   assert(MatchInfo.InstrsToBuild.size() &&
1877          "Expected at least one instr to build?");
1878   Builder.setInstr(MI);
1879   for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
1880     assert(InstrToBuild.Opcode && "Expected a valid opcode?");
1881     assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
1882     MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
1883     for (auto &OperandFn : InstrToBuild.OperandFns)
1884       OperandFn(Instr);
1885   }
1886   MI.eraseFromParent();
1887   return true;
1888 }
1889 
1890 bool CombinerHelper::tryCombine(MachineInstr &MI) {
1891   if (tryCombineCopy(MI))
1892     return true;
1893   if (tryCombineExtendingLoads(MI))
1894     return true;
1895   if (tryCombineIndexedLoadStore(MI))
1896     return true;
1897   return false;
1898 }
1899