1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/ADT/SetVector.h"
10 #include "llvm/ADT/SmallBitVector.h"
11 #include "llvm/CodeGen/GlobalISel/Combiner.h"
12 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h"
15 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
16 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
17 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
18 #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h"
19 #include "llvm/CodeGen/GlobalISel/Utils.h"
20 #include "llvm/CodeGen/LowLevelType.h"
21 #include "llvm/CodeGen/MachineBasicBlock.h"
22 #include "llvm/CodeGen/MachineDominators.h"
23 #include "llvm/CodeGen/MachineFrameInfo.h"
24 #include "llvm/CodeGen/MachineInstr.h"
25 #include "llvm/CodeGen/MachineMemOperand.h"
26 #include "llvm/CodeGen/MachineRegisterInfo.h"
27 #include "llvm/CodeGen/TargetInstrInfo.h"
28 #include "llvm/CodeGen/TargetLowering.h"
29 #include "llvm/CodeGen/TargetOpcodes.h"
30 #include "llvm/Support/MathExtras.h"
31 #include "llvm/Target/TargetMachine.h"
32 #include <tuple>
33 
34 #define DEBUG_TYPE "gi-combiner"
35 
36 using namespace llvm;
37 using namespace MIPatternMatch;
38 
39 // Option to allow testing of the combiner while no targets know about indexed
40 // addressing.
41 static cl::opt<bool>
42     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
43                        cl::desc("Force all indexed operations to be "
44                                 "legal for the GlobalISel combiner"));
45 
46 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
47                                MachineIRBuilder &B, GISelKnownBits *KB,
48                                MachineDominatorTree *MDT,
49                                const LegalizerInfo *LI)
50     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer), KB(KB),
51       MDT(MDT), LI(LI), RBI(Builder.getMF().getSubtarget().getRegBankInfo()),
52       TRI(Builder.getMF().getSubtarget().getRegisterInfo()) {
53   (void)this->KB;
54 }
55 
56 const TargetLowering &CombinerHelper::getTargetLowering() const {
57   return *Builder.getMF().getSubtarget().getTargetLowering();
58 }
59 
60 /// \returns The little endian in-memory byte position of byte \p I in a
61 /// \p ByteWidth bytes wide type.
62 ///
63 /// E.g. Given a 4-byte type x, x[0] -> byte 0
64 static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
65   assert(I < ByteWidth && "I must be in [0, ByteWidth)");
66   return I;
67 }
68 
69 /// \returns The big endian in-memory byte position of byte \p I in a
70 /// \p ByteWidth bytes wide type.
71 ///
72 /// E.g. Given a 4-byte type x, x[0] -> byte 3
73 static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
74   assert(I < ByteWidth && "I must be in [0, ByteWidth)");
75   return ByteWidth - I - 1;
76 }
77 
78 /// Given a map from byte offsets in memory to indices in a load/store,
79 /// determine if that map corresponds to a little or big endian byte pattern.
80 ///
81 /// \param MemOffset2Idx maps memory offsets to address offsets.
82 /// \param LowestIdx is the lowest index in \p MemOffset2Idx.
83 ///
84 /// \returns true if the map corresponds to a big endian byte pattern, false
85 /// if it corresponds to a little endian byte pattern, and None otherwise.
86 ///
87 /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
88 /// are as follows:
89 ///
90 /// AddrOffset   Little endian    Big endian
91 /// 0            0                3
92 /// 1            1                2
93 /// 2            2                1
94 /// 3            3                0
95 static Optional<bool>
96 isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
97             int64_t LowestIdx) {
98   // Need at least two byte positions to decide on endianness.
99   unsigned Width = MemOffset2Idx.size();
100   if (Width < 2)
101     return None;
102   bool BigEndian = true, LittleEndian = true;
103   for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
104     auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
105     if (MemOffsetAndIdx == MemOffset2Idx.end())
106       return None;
107     const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
108     assert(Idx >= 0 && "Expected non-negative byte offset?");
109     LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
110     BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
111     if (!BigEndian && !LittleEndian)
112       return None;
113   }
114 
115   assert((BigEndian != LittleEndian) &&
116          "Pattern cannot be both big and little endian!");
117   return BigEndian;
118 }
119 
120 bool CombinerHelper::isLegalOrBeforeLegalizer(
121     const LegalityQuery &Query) const {
122   return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
123 }
124 
125 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
126                                     Register ToReg) const {
127   Observer.changingAllUsesOfReg(MRI, FromReg);
128 
129   if (MRI.constrainRegAttrs(ToReg, FromReg))
130     MRI.replaceRegWith(FromReg, ToReg);
131   else
132     Builder.buildCopy(ToReg, FromReg);
133 
134   Observer.finishedChangingAllUsesOfReg();
135 }
136 
137 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
138                                       MachineOperand &FromRegOp,
139                                       Register ToReg) const {
140   assert(FromRegOp.getParent() && "Expected an operand in an MI");
141   Observer.changingInstr(*FromRegOp.getParent());
142 
143   FromRegOp.setReg(ToReg);
144 
145   Observer.changedInstr(*FromRegOp.getParent());
146 }
147 
148 const RegisterBank *CombinerHelper::getRegBank(Register Reg) const {
149   return RBI->getRegBank(Reg, MRI, *TRI);
150 }
151 
152 void CombinerHelper::setRegBank(Register Reg, const RegisterBank *RegBank) {
153   if (RegBank)
154     MRI.setRegBank(Reg, *RegBank);
155 }
156 
157 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
158   if (matchCombineCopy(MI)) {
159     applyCombineCopy(MI);
160     return true;
161   }
162   return false;
163 }
164 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
165   if (MI.getOpcode() != TargetOpcode::COPY)
166     return false;
167   Register DstReg = MI.getOperand(0).getReg();
168   Register SrcReg = MI.getOperand(1).getReg();
169   return canReplaceReg(DstReg, SrcReg, MRI);
170 }
171 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
172   Register DstReg = MI.getOperand(0).getReg();
173   Register SrcReg = MI.getOperand(1).getReg();
174   MI.eraseFromParent();
175   replaceRegWith(MRI, DstReg, SrcReg);
176 }
177 
178 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
179   bool IsUndef = false;
180   SmallVector<Register, 4> Ops;
181   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
182     applyCombineConcatVectors(MI, IsUndef, Ops);
183     return true;
184   }
185   return false;
186 }
187 
188 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
189                                                SmallVectorImpl<Register> &Ops) {
190   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
191          "Invalid instruction");
192   IsUndef = true;
193   MachineInstr *Undef = nullptr;
194 
195   // Walk over all the operands of concat vectors and check if they are
196   // build_vector themselves or undef.
197   // Then collect their operands in Ops.
198   for (const MachineOperand &MO : MI.uses()) {
199     Register Reg = MO.getReg();
200     MachineInstr *Def = MRI.getVRegDef(Reg);
201     assert(Def && "Operand not defined");
202     switch (Def->getOpcode()) {
203     case TargetOpcode::G_BUILD_VECTOR:
204       IsUndef = false;
205       // Remember the operands of the build_vector to fold
206       // them into the yet-to-build flattened concat vectors.
207       for (const MachineOperand &BuildVecMO : Def->uses())
208         Ops.push_back(BuildVecMO.getReg());
209       break;
210     case TargetOpcode::G_IMPLICIT_DEF: {
211       LLT OpType = MRI.getType(Reg);
212       // Keep one undef value for all the undef operands.
213       if (!Undef) {
214         Builder.setInsertPt(*MI.getParent(), MI);
215         Undef = Builder.buildUndef(OpType.getScalarType());
216       }
217       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
218                  OpType.getScalarType() &&
219              "All undefs should have the same type");
220       // Break the undef vector in as many scalar elements as needed
221       // for the flattening.
222       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
223            EltIdx != EltEnd; ++EltIdx)
224         Ops.push_back(Undef->getOperand(0).getReg());
225       break;
226     }
227     default:
228       return false;
229     }
230   }
231   return true;
232 }
233 void CombinerHelper::applyCombineConcatVectors(
234     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
235   // We determined that the concat_vectors can be flatten.
236   // Generate the flattened build_vector.
237   Register DstReg = MI.getOperand(0).getReg();
238   Builder.setInsertPt(*MI.getParent(), MI);
239   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
240 
241   // Note: IsUndef is sort of redundant. We could have determine it by
242   // checking that at all Ops are undef.  Alternatively, we could have
243   // generate a build_vector of undefs and rely on another combine to
244   // clean that up.  For now, given we already gather this information
245   // in tryCombineConcatVectors, just save compile time and issue the
246   // right thing.
247   if (IsUndef)
248     Builder.buildUndef(NewDstReg);
249   else
250     Builder.buildBuildVector(NewDstReg, Ops);
251   MI.eraseFromParent();
252   replaceRegWith(MRI, DstReg, NewDstReg);
253 }
254 
255 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
256   SmallVector<Register, 4> Ops;
257   if (matchCombineShuffleVector(MI, Ops)) {
258     applyCombineShuffleVector(MI, Ops);
259     return true;
260   }
261   return false;
262 }
263 
264 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
265                                                SmallVectorImpl<Register> &Ops) {
266   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
267          "Invalid instruction kind");
268   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
269   Register Src1 = MI.getOperand(1).getReg();
270   LLT SrcType = MRI.getType(Src1);
271   // As bizarre as it may look, shuffle vector can actually produce
272   // scalar! This is because at the IR level a <1 x ty> shuffle
273   // vector is perfectly valid.
274   unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
275   unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
276 
277   // If the resulting vector is smaller than the size of the source
278   // vectors being concatenated, we won't be able to replace the
279   // shuffle vector into a concat_vectors.
280   //
281   // Note: We may still be able to produce a concat_vectors fed by
282   //       extract_vector_elt and so on. It is less clear that would
283   //       be better though, so don't bother for now.
284   //
285   // If the destination is a scalar, the size of the sources doesn't
286   // matter. we will lower the shuffle to a plain copy. This will
287   // work only if the source and destination have the same size. But
288   // that's covered by the next condition.
289   //
290   // TODO: If the size between the source and destination don't match
291   //       we could still emit an extract vector element in that case.
292   if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
293     return false;
294 
295   // Check that the shuffle mask can be broken evenly between the
296   // different sources.
297   if (DstNumElts % SrcNumElts != 0)
298     return false;
299 
300   // Mask length is a multiple of the source vector length.
301   // Check if the shuffle is some kind of concatenation of the input
302   // vectors.
303   unsigned NumConcat = DstNumElts / SrcNumElts;
304   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
305   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
306   for (unsigned i = 0; i != DstNumElts; ++i) {
307     int Idx = Mask[i];
308     // Undef value.
309     if (Idx < 0)
310       continue;
311     // Ensure the indices in each SrcType sized piece are sequential and that
312     // the same source is used for the whole piece.
313     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
314         (ConcatSrcs[i / SrcNumElts] >= 0 &&
315          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
316       return false;
317     // Remember which source this index came from.
318     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
319   }
320 
321   // The shuffle is concatenating multiple vectors together.
322   // Collect the different operands for that.
323   Register UndefReg;
324   Register Src2 = MI.getOperand(2).getReg();
325   for (auto Src : ConcatSrcs) {
326     if (Src < 0) {
327       if (!UndefReg) {
328         Builder.setInsertPt(*MI.getParent(), MI);
329         UndefReg = Builder.buildUndef(SrcType).getReg(0);
330       }
331       Ops.push_back(UndefReg);
332     } else if (Src == 0)
333       Ops.push_back(Src1);
334     else
335       Ops.push_back(Src2);
336   }
337   return true;
338 }
339 
340 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
341                                                const ArrayRef<Register> Ops) {
342   Register DstReg = MI.getOperand(0).getReg();
343   Builder.setInsertPt(*MI.getParent(), MI);
344   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
345 
346   if (Ops.size() == 1)
347     Builder.buildCopy(NewDstReg, Ops[0]);
348   else
349     Builder.buildMerge(NewDstReg, Ops);
350 
351   MI.eraseFromParent();
352   replaceRegWith(MRI, DstReg, NewDstReg);
353 }
354 
355 namespace {
356 
357 /// Select a preference between two uses. CurrentUse is the current preference
358 /// while *ForCandidate is attributes of the candidate under consideration.
359 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
360                                   const LLT TyForCandidate,
361                                   unsigned OpcodeForCandidate,
362                                   MachineInstr *MIForCandidate) {
363   if (!CurrentUse.Ty.isValid()) {
364     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
365         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
366       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
367     return CurrentUse;
368   }
369 
370   // We permit the extend to hoist through basic blocks but this is only
371   // sensible if the target has extending loads. If you end up lowering back
372   // into a load and extend during the legalizer then the end result is
373   // hoisting the extend up to the load.
374 
375   // Prefer defined extensions to undefined extensions as these are more
376   // likely to reduce the number of instructions.
377   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
378       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
379     return CurrentUse;
380   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
381            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
382     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
383 
384   // Prefer sign extensions to zero extensions as sign-extensions tend to be
385   // more expensive.
386   if (CurrentUse.Ty == TyForCandidate) {
387     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
388         OpcodeForCandidate == TargetOpcode::G_ZEXT)
389       return CurrentUse;
390     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
391              OpcodeForCandidate == TargetOpcode::G_SEXT)
392       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
393   }
394 
395   // This is potentially target specific. We've chosen the largest type
396   // because G_TRUNC is usually free. One potential catch with this is that
397   // some targets have a reduced number of larger registers than smaller
398   // registers and this choice potentially increases the live-range for the
399   // larger value.
400   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
401     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
402   }
403   return CurrentUse;
404 }
405 
406 /// Find a suitable place to insert some instructions and insert them. This
407 /// function accounts for special cases like inserting before a PHI node.
408 /// The current strategy for inserting before PHI's is to duplicate the
409 /// instructions for each predecessor. However, while that's ok for G_TRUNC
410 /// on most targets since it generally requires no code, other targets/cases may
411 /// want to try harder to find a dominating block.
412 static void InsertInsnsWithoutSideEffectsBeforeUse(
413     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
414     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
415                        MachineOperand &UseMO)>
416         Inserter) {
417   MachineInstr &UseMI = *UseMO.getParent();
418 
419   MachineBasicBlock *InsertBB = UseMI.getParent();
420 
421   // If the use is a PHI then we want the predecessor block instead.
422   if (UseMI.isPHI()) {
423     MachineOperand *PredBB = std::next(&UseMO);
424     InsertBB = PredBB->getMBB();
425   }
426 
427   // If the block is the same block as the def then we want to insert just after
428   // the def instead of at the start of the block.
429   if (InsertBB == DefMI.getParent()) {
430     MachineBasicBlock::iterator InsertPt = &DefMI;
431     Inserter(InsertBB, std::next(InsertPt), UseMO);
432     return;
433   }
434 
435   // Otherwise we want the start of the BB
436   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
437 }
438 } // end anonymous namespace
439 
440 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
441   PreferredTuple Preferred;
442   if (matchCombineExtendingLoads(MI, Preferred)) {
443     applyCombineExtendingLoads(MI, Preferred);
444     return true;
445   }
446   return false;
447 }
448 
449 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
450                                                 PreferredTuple &Preferred) {
451   // We match the loads and follow the uses to the extend instead of matching
452   // the extends and following the def to the load. This is because the load
453   // must remain in the same position for correctness (unless we also add code
454   // to find a safe place to sink it) whereas the extend is freely movable.
455   // It also prevents us from duplicating the load for the volatile case or just
456   // for performance.
457   GAnyLoad *LoadMI = dyn_cast<GAnyLoad>(&MI);
458   if (!LoadMI)
459     return false;
460 
461   Register LoadReg = LoadMI->getDstReg();
462 
463   LLT LoadValueTy = MRI.getType(LoadReg);
464   if (!LoadValueTy.isScalar())
465     return false;
466 
467   // Most architectures are going to legalize <s8 loads into at least a 1 byte
468   // load, and the MMOs can only describe memory accesses in multiples of bytes.
469   // If we try to perform extload combining on those, we can end up with
470   // %a(s8) = extload %ptr (load 1 byte from %ptr)
471   // ... which is an illegal extload instruction.
472   if (LoadValueTy.getSizeInBits() < 8)
473     return false;
474 
475   // For non power-of-2 types, they will very likely be legalized into multiple
476   // loads. Don't bother trying to match them into extending loads.
477   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
478     return false;
479 
480   // Find the preferred type aside from the any-extends (unless it's the only
481   // one) and non-extending ops. We'll emit an extending load to that type and
482   // and emit a variant of (extend (trunc X)) for the others according to the
483   // relative type sizes. At the same time, pick an extend to use based on the
484   // extend involved in the chosen type.
485   unsigned PreferredOpcode =
486       isa<GLoad>(&MI)
487           ? TargetOpcode::G_ANYEXT
488           : isa<GSExtLoad>(&MI) ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
489   Preferred = {LLT(), PreferredOpcode, nullptr};
490   for (auto &UseMI : MRI.use_nodbg_instructions(LoadReg)) {
491     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
492         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
493         (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
494       const auto &MMO = LoadMI->getMMO();
495       // For atomics, only form anyextending loads.
496       if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
497         continue;
498       // Check for legality.
499       if (LI) {
500         LegalityQuery::MemDesc MMDesc;
501         MMDesc.MemoryTy = MMO.getMemoryType();
502         MMDesc.AlignInBits = MMO.getAlign().value() * 8;
503         MMDesc.Ordering = MMO.getSuccessOrdering();
504         LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
505         LLT SrcTy = MRI.getType(LoadMI->getPointerReg());
506         if (LI->getAction({LoadMI->getOpcode(), {UseTy, SrcTy}, {MMDesc}})
507                 .Action != LegalizeActions::Legal)
508           continue;
509       }
510       Preferred = ChoosePreferredUse(Preferred,
511                                      MRI.getType(UseMI.getOperand(0).getReg()),
512                                      UseMI.getOpcode(), &UseMI);
513     }
514   }
515 
516   // There were no extends
517   if (!Preferred.MI)
518     return false;
519   // It should be impossible to chose an extend without selecting a different
520   // type since by definition the result of an extend is larger.
521   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
522 
523   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
524   return true;
525 }
526 
527 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
528                                                 PreferredTuple &Preferred) {
529   // Rewrite the load to the chosen extending load.
530   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
531 
532   // Inserter to insert a truncate back to the original type at a given point
533   // with some basic CSE to limit truncate duplication to one per BB.
534   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
535   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
536                            MachineBasicBlock::iterator InsertBefore,
537                            MachineOperand &UseMO) {
538     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
539     if (PreviouslyEmitted) {
540       Observer.changingInstr(*UseMO.getParent());
541       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
542       Observer.changedInstr(*UseMO.getParent());
543       return;
544     }
545 
546     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
547     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
548     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
549     EmittedInsns[InsertIntoBB] = NewMI;
550     replaceRegOpWith(MRI, UseMO, NewDstReg);
551   };
552 
553   Observer.changingInstr(MI);
554   MI.setDesc(
555       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
556                                ? TargetOpcode::G_SEXTLOAD
557                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
558                                      ? TargetOpcode::G_ZEXTLOAD
559                                      : TargetOpcode::G_LOAD));
560 
561   // Rewrite all the uses to fix up the types.
562   auto &LoadValue = MI.getOperand(0);
563   SmallVector<MachineOperand *, 4> Uses;
564   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
565     Uses.push_back(&UseMO);
566 
567   for (auto *UseMO : Uses) {
568     MachineInstr *UseMI = UseMO->getParent();
569 
570     // If the extend is compatible with the preferred extend then we should fix
571     // up the type and extend so that it uses the preferred use.
572     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
573         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
574       Register UseDstReg = UseMI->getOperand(0).getReg();
575       MachineOperand &UseSrcMO = UseMI->getOperand(1);
576       const LLT UseDstTy = MRI.getType(UseDstReg);
577       if (UseDstReg != ChosenDstReg) {
578         if (Preferred.Ty == UseDstTy) {
579           // If the use has the same type as the preferred use, then merge
580           // the vregs and erase the extend. For example:
581           //    %1:_(s8) = G_LOAD ...
582           //    %2:_(s32) = G_SEXT %1(s8)
583           //    %3:_(s32) = G_ANYEXT %1(s8)
584           //    ... = ... %3(s32)
585           // rewrites to:
586           //    %2:_(s32) = G_SEXTLOAD ...
587           //    ... = ... %2(s32)
588           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
589           Observer.erasingInstr(*UseMO->getParent());
590           UseMO->getParent()->eraseFromParent();
591         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
592           // If the preferred size is smaller, then keep the extend but extend
593           // from the result of the extending load. For example:
594           //    %1:_(s8) = G_LOAD ...
595           //    %2:_(s32) = G_SEXT %1(s8)
596           //    %3:_(s64) = G_ANYEXT %1(s8)
597           //    ... = ... %3(s64)
598           /// rewrites to:
599           //    %2:_(s32) = G_SEXTLOAD ...
600           //    %3:_(s64) = G_ANYEXT %2:_(s32)
601           //    ... = ... %3(s64)
602           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
603         } else {
604           // If the preferred size is large, then insert a truncate. For
605           // example:
606           //    %1:_(s8) = G_LOAD ...
607           //    %2:_(s64) = G_SEXT %1(s8)
608           //    %3:_(s32) = G_ZEXT %1(s8)
609           //    ... = ... %3(s32)
610           /// rewrites to:
611           //    %2:_(s64) = G_SEXTLOAD ...
612           //    %4:_(s8) = G_TRUNC %2:_(s32)
613           //    %3:_(s64) = G_ZEXT %2:_(s8)
614           //    ... = ... %3(s64)
615           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
616                                                  InsertTruncAt);
617         }
618         continue;
619       }
620       // The use is (one of) the uses of the preferred use we chose earlier.
621       // We're going to update the load to def this value later so just erase
622       // the old extend.
623       Observer.erasingInstr(*UseMO->getParent());
624       UseMO->getParent()->eraseFromParent();
625       continue;
626     }
627 
628     // The use isn't an extend. Truncate back to the type we originally loaded.
629     // This is free on many targets.
630     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
631   }
632 
633   MI.getOperand(0).setReg(ChosenDstReg);
634   Observer.changedInstr(MI);
635 }
636 
637 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
638                                    const MachineInstr &UseMI) {
639   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
640          "shouldn't consider debug uses");
641   assert(DefMI.getParent() == UseMI.getParent());
642   if (&DefMI == &UseMI)
643     return false;
644   const MachineBasicBlock &MBB = *DefMI.getParent();
645   auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
646     return &MI == &DefMI || &MI == &UseMI;
647   });
648   if (DefOrUse == MBB.end())
649     llvm_unreachable("Block must contain both DefMI and UseMI!");
650   return &*DefOrUse == &DefMI;
651 }
652 
653 bool CombinerHelper::dominates(const MachineInstr &DefMI,
654                                const MachineInstr &UseMI) {
655   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
656          "shouldn't consider debug uses");
657   if (MDT)
658     return MDT->dominates(&DefMI, &UseMI);
659   else if (DefMI.getParent() != UseMI.getParent())
660     return false;
661 
662   return isPredecessor(DefMI, UseMI);
663 }
664 
665 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
666   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
667   Register SrcReg = MI.getOperand(1).getReg();
668   Register LoadUser = SrcReg;
669 
670   if (MRI.getType(SrcReg).isVector())
671     return false;
672 
673   Register TruncSrc;
674   if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
675     LoadUser = TruncSrc;
676 
677   uint64_t SizeInBits = MI.getOperand(2).getImm();
678   // If the source is a G_SEXTLOAD from the same bit width, then we don't
679   // need any extend at all, just a truncate.
680   if (auto *LoadMI = getOpcodeDef<GSExtLoad>(LoadUser, MRI)) {
681     // If truncating more than the original extended value, abort.
682     auto LoadSizeBits = LoadMI->getMemSizeInBits();
683     if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < LoadSizeBits)
684       return false;
685     if (LoadSizeBits == SizeInBits)
686       return true;
687   }
688   return false;
689 }
690 
691 void CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
692   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
693   Builder.setInstrAndDebugLoc(MI);
694   Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
695   MI.eraseFromParent();
696 }
697 
698 bool CombinerHelper::matchSextInRegOfLoad(
699     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
700   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
701 
702   // Only supports scalars for now.
703   if (MRI.getType(MI.getOperand(0).getReg()).isVector())
704     return false;
705 
706   Register SrcReg = MI.getOperand(1).getReg();
707   auto *LoadDef = getOpcodeDef<GLoad>(SrcReg, MRI);
708   if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()) ||
709       !LoadDef->isSimple())
710     return false;
711 
712   // If the sign extend extends from a narrower width than the load's width,
713   // then we can narrow the load width when we combine to a G_SEXTLOAD.
714   // Avoid widening the load at all.
715   unsigned NewSizeBits = std::min((uint64_t)MI.getOperand(2).getImm(),
716                                   LoadDef->getMemSizeInBits());
717 
718   // Don't generate G_SEXTLOADs with a < 1 byte width.
719   if (NewSizeBits < 8)
720     return false;
721   // Don't bother creating a non-power-2 sextload, it will likely be broken up
722   // anyway for most targets.
723   if (!isPowerOf2_32(NewSizeBits))
724     return false;
725 
726   const MachineMemOperand &MMO = LoadDef->getMMO();
727   LegalityQuery::MemDesc MMDesc;
728   MMDesc.MemoryTy = LLT::scalar(NewSizeBits);
729   MMDesc.AlignInBits = MMO.getAlign().value() * 8;
730   MMDesc.Ordering = MMO.getSuccessOrdering();
731   if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SEXTLOAD,
732                                  {MRI.getType(LoadDef->getDstReg()),
733                                   MRI.getType(LoadDef->getPointerReg())},
734                                  {MMDesc}}))
735     return false;
736 
737   MatchInfo = std::make_tuple(LoadDef->getDstReg(), NewSizeBits);
738   return true;
739 }
740 
741 void CombinerHelper::applySextInRegOfLoad(
742     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
743   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
744   Register LoadReg;
745   unsigned ScalarSizeBits;
746   std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
747   GLoad *LoadDef = cast<GLoad>(MRI.getVRegDef(LoadReg));
748 
749   // If we have the following:
750   // %ld = G_LOAD %ptr, (load 2)
751   // %ext = G_SEXT_INREG %ld, 8
752   //    ==>
753   // %ld = G_SEXTLOAD %ptr (load 1)
754 
755   auto &MMO = LoadDef->getMMO();
756   Builder.setInstrAndDebugLoc(*LoadDef);
757   auto &MF = Builder.getMF();
758   auto PtrInfo = MMO.getPointerInfo();
759   auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
760   Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
761                          LoadDef->getPointerReg(), *NewMMO);
762   MI.eraseFromParent();
763 }
764 
765 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
766                                             Register &Base, Register &Offset) {
767   auto &MF = *MI.getParent()->getParent();
768   const auto &TLI = *MF.getSubtarget().getTargetLowering();
769 
770 #ifndef NDEBUG
771   unsigned Opcode = MI.getOpcode();
772   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
773          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
774 #endif
775 
776   Base = MI.getOperand(1).getReg();
777   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
778   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
779     return false;
780 
781   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
782   // FIXME: The following use traversal needs a bail out for patholigical cases.
783   for (auto &Use : MRI.use_nodbg_instructions(Base)) {
784     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
785       continue;
786 
787     Offset = Use.getOperand(2).getReg();
788     if (!ForceLegalIndexing &&
789         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
790       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
791                         << Use);
792       continue;
793     }
794 
795     // Make sure the offset calculation is before the potentially indexed op.
796     // FIXME: we really care about dependency here. The offset calculation might
797     // be movable.
798     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
799     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
800       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
801                         << Use);
802       continue;
803     }
804 
805     // FIXME: check whether all uses of Base are load/store with foldable
806     // addressing modes. If so, using the normal addr-modes is better than
807     // forming an indexed one.
808 
809     bool MemOpDominatesAddrUses = true;
810     for (auto &PtrAddUse :
811          MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
812       if (!dominates(MI, PtrAddUse)) {
813         MemOpDominatesAddrUses = false;
814         break;
815       }
816     }
817 
818     if (!MemOpDominatesAddrUses) {
819       LLVM_DEBUG(
820           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
821                  << Use);
822       continue;
823     }
824 
825     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
826     Addr = Use.getOperand(0).getReg();
827     return true;
828   }
829 
830   return false;
831 }
832 
833 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
834                                            Register &Base, Register &Offset) {
835   auto &MF = *MI.getParent()->getParent();
836   const auto &TLI = *MF.getSubtarget().getTargetLowering();
837 
838 #ifndef NDEBUG
839   unsigned Opcode = MI.getOpcode();
840   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
841          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
842 #endif
843 
844   Addr = MI.getOperand(1).getReg();
845   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
846   if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
847     return false;
848 
849   Base = AddrDef->getOperand(1).getReg();
850   Offset = AddrDef->getOperand(2).getReg();
851 
852   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
853 
854   if (!ForceLegalIndexing &&
855       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
856     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
857     return false;
858   }
859 
860   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
861   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
862     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
863     return false;
864   }
865 
866   if (MI.getOpcode() == TargetOpcode::G_STORE) {
867     // Would require a copy.
868     if (Base == MI.getOperand(0).getReg()) {
869       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
870       return false;
871     }
872 
873     // We're expecting one use of Addr in MI, but it could also be the
874     // value stored, which isn't actually dominated by the instruction.
875     if (MI.getOperand(0).getReg() == Addr) {
876       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
877       return false;
878     }
879   }
880 
881   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
882   // That might allow us to end base's liveness here by adjusting the constant.
883 
884   for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
885     if (!dominates(MI, UseMI)) {
886       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
887       return false;
888     }
889   }
890 
891   return true;
892 }
893 
894 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
895   IndexedLoadStoreMatchInfo MatchInfo;
896   if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
897     applyCombineIndexedLoadStore(MI, MatchInfo);
898     return true;
899   }
900   return false;
901 }
902 
903 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
904   unsigned Opcode = MI.getOpcode();
905   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
906       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
907     return false;
908 
909   // For now, no targets actually support these opcodes so don't waste time
910   // running these unless we're forced to for testing.
911   if (!ForceLegalIndexing)
912     return false;
913 
914   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
915                                           MatchInfo.Offset);
916   if (!MatchInfo.IsPre &&
917       !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
918                               MatchInfo.Offset))
919     return false;
920 
921   return true;
922 }
923 
924 void CombinerHelper::applyCombineIndexedLoadStore(
925     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
926   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
927   MachineIRBuilder MIRBuilder(MI);
928   unsigned Opcode = MI.getOpcode();
929   bool IsStore = Opcode == TargetOpcode::G_STORE;
930   unsigned NewOpcode;
931   switch (Opcode) {
932   case TargetOpcode::G_LOAD:
933     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
934     break;
935   case TargetOpcode::G_SEXTLOAD:
936     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
937     break;
938   case TargetOpcode::G_ZEXTLOAD:
939     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
940     break;
941   case TargetOpcode::G_STORE:
942     NewOpcode = TargetOpcode::G_INDEXED_STORE;
943     break;
944   default:
945     llvm_unreachable("Unknown load/store opcode");
946   }
947 
948   auto MIB = MIRBuilder.buildInstr(NewOpcode);
949   if (IsStore) {
950     MIB.addDef(MatchInfo.Addr);
951     MIB.addUse(MI.getOperand(0).getReg());
952   } else {
953     MIB.addDef(MI.getOperand(0).getReg());
954     MIB.addDef(MatchInfo.Addr);
955   }
956 
957   MIB.addUse(MatchInfo.Base);
958   MIB.addUse(MatchInfo.Offset);
959   MIB.addImm(MatchInfo.IsPre);
960   MI.eraseFromParent();
961   AddrDef.eraseFromParent();
962 
963   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
964 }
965 
966 bool CombinerHelper::matchCombineDivRem(MachineInstr &MI,
967                                         MachineInstr *&OtherMI) {
968   unsigned Opcode = MI.getOpcode();
969   bool IsDiv, IsSigned;
970 
971   switch (Opcode) {
972   default:
973     llvm_unreachable("Unexpected opcode!");
974   case TargetOpcode::G_SDIV:
975   case TargetOpcode::G_UDIV: {
976     IsDiv = true;
977     IsSigned = Opcode == TargetOpcode::G_SDIV;
978     break;
979   }
980   case TargetOpcode::G_SREM:
981   case TargetOpcode::G_UREM: {
982     IsDiv = false;
983     IsSigned = Opcode == TargetOpcode::G_SREM;
984     break;
985   }
986   }
987 
988   Register Src1 = MI.getOperand(1).getReg();
989   unsigned DivOpcode, RemOpcode, DivremOpcode;
990   if (IsSigned) {
991     DivOpcode = TargetOpcode::G_SDIV;
992     RemOpcode = TargetOpcode::G_SREM;
993     DivremOpcode = TargetOpcode::G_SDIVREM;
994   } else {
995     DivOpcode = TargetOpcode::G_UDIV;
996     RemOpcode = TargetOpcode::G_UREM;
997     DivremOpcode = TargetOpcode::G_UDIVREM;
998   }
999 
1000   if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
1001     return false;
1002 
1003   // Combine:
1004   //   %div:_ = G_[SU]DIV %src1:_, %src2:_
1005   //   %rem:_ = G_[SU]REM %src1:_, %src2:_
1006   // into:
1007   //  %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1008 
1009   // Combine:
1010   //   %rem:_ = G_[SU]REM %src1:_, %src2:_
1011   //   %div:_ = G_[SU]DIV %src1:_, %src2:_
1012   // into:
1013   //  %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1014 
1015   for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1016     if (MI.getParent() == UseMI.getParent() &&
1017         ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1018          (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1019         matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) {
1020       OtherMI = &UseMI;
1021       return true;
1022     }
1023   }
1024 
1025   return false;
1026 }
1027 
1028 void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
1029                                         MachineInstr *&OtherMI) {
1030   unsigned Opcode = MI.getOpcode();
1031   assert(OtherMI && "OtherMI shouldn't be empty.");
1032 
1033   Register DestDivReg, DestRemReg;
1034   if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1035     DestDivReg = MI.getOperand(0).getReg();
1036     DestRemReg = OtherMI->getOperand(0).getReg();
1037   } else {
1038     DestDivReg = OtherMI->getOperand(0).getReg();
1039     DestRemReg = MI.getOperand(0).getReg();
1040   }
1041 
1042   bool IsSigned =
1043       Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1044 
1045   // Check which instruction is first in the block so we don't break def-use
1046   // deps by "moving" the instruction incorrectly.
1047   if (dominates(MI, *OtherMI))
1048     Builder.setInstrAndDebugLoc(MI);
1049   else
1050     Builder.setInstrAndDebugLoc(*OtherMI);
1051 
1052   Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1053                               : TargetOpcode::G_UDIVREM,
1054                      {DestDivReg, DestRemReg},
1055                      {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
1056   MI.eraseFromParent();
1057   OtherMI->eraseFromParent();
1058 }
1059 
1060 bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI,
1061                                                    MachineInstr *&BrCond) {
1062   assert(MI.getOpcode() == TargetOpcode::G_BR);
1063 
1064   // Try to match the following:
1065   // bb1:
1066   //   G_BRCOND %c1, %bb2
1067   //   G_BR %bb3
1068   // bb2:
1069   // ...
1070   // bb3:
1071 
1072   // The above pattern does not have a fall through to the successor bb2, always
1073   // resulting in a branch no matter which path is taken. Here we try to find
1074   // and replace that pattern with conditional branch to bb3 and otherwise
1075   // fallthrough to bb2. This is generally better for branch predictors.
1076 
1077   MachineBasicBlock *MBB = MI.getParent();
1078   MachineBasicBlock::iterator BrIt(MI);
1079   if (BrIt == MBB->begin())
1080     return false;
1081   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1082 
1083   BrCond = &*std::prev(BrIt);
1084   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1085     return false;
1086 
1087   // Check that the next block is the conditional branch target. Also make sure
1088   // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1089   MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1090   return BrCondTarget != MI.getOperand(0).getMBB() &&
1091          MBB->isLayoutSuccessor(BrCondTarget);
1092 }
1093 
1094 void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
1095                                                    MachineInstr *&BrCond) {
1096   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1097   Builder.setInstrAndDebugLoc(*BrCond);
1098   LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1099   // FIXME: Does int/fp matter for this? If so, we might need to restrict
1100   // this to i1 only since we might not know for sure what kind of
1101   // compare generated the condition value.
1102   auto True = Builder.buildConstant(
1103       Ty, getICmpTrueVal(getTargetLowering(), false, false));
1104   auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1105 
1106   auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1107   Observer.changingInstr(MI);
1108   MI.getOperand(0).setMBB(FallthroughBB);
1109   Observer.changedInstr(MI);
1110 
1111   // Change the conditional branch to use the inverted condition and
1112   // new target block.
1113   Observer.changingInstr(*BrCond);
1114   BrCond->getOperand(0).setReg(Xor.getReg(0));
1115   BrCond->getOperand(1).setMBB(BrTarget);
1116   Observer.changedInstr(*BrCond);
1117 }
1118 
1119 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
1120   // On Darwin, -Os means optimize for size without hurting performance, so
1121   // only really optimize for size when -Oz (MinSize) is used.
1122   if (MF.getTarget().getTargetTriple().isOSDarwin())
1123     return MF.getFunction().hasMinSize();
1124   return MF.getFunction().hasOptSize();
1125 }
1126 
1127 // Returns a list of types to use for memory op lowering in MemOps. A partial
1128 // port of findOptimalMemOpLowering in TargetLowering.
1129 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
1130                                           unsigned Limit, const MemOp &Op,
1131                                           unsigned DstAS, unsigned SrcAS,
1132                                           const AttributeList &FuncAttributes,
1133                                           const TargetLowering &TLI) {
1134   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
1135     return false;
1136 
1137   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
1138 
1139   if (Ty == LLT()) {
1140     // Use the largest scalar type whose alignment constraints are satisfied.
1141     // We only need to check DstAlign here as SrcAlign is always greater or
1142     // equal to DstAlign (or zero).
1143     Ty = LLT::scalar(64);
1144     if (Op.isFixedDstAlign())
1145       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
1146              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
1147         Ty = LLT::scalar(Ty.getSizeInBytes());
1148     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
1149     // FIXME: check for the largest legal type we can load/store to.
1150   }
1151 
1152   unsigned NumMemOps = 0;
1153   uint64_t Size = Op.size();
1154   while (Size) {
1155     unsigned TySize = Ty.getSizeInBytes();
1156     while (TySize > Size) {
1157       // For now, only use non-vector load / store's for the left-over pieces.
1158       LLT NewTy = Ty;
1159       // FIXME: check for mem op safety and legality of the types. Not all of
1160       // SDAGisms map cleanly to GISel concepts.
1161       if (NewTy.isVector())
1162         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
1163       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
1164       unsigned NewTySize = NewTy.getSizeInBytes();
1165       assert(NewTySize > 0 && "Could not find appropriate type");
1166 
1167       // If the new LLT cannot cover all of the remaining bits, then consider
1168       // issuing a (or a pair of) unaligned and overlapping load / store.
1169       bool Fast;
1170       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
1171       MVT VT = getMVTForLLT(Ty);
1172       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
1173           TLI.allowsMisalignedMemoryAccesses(
1174               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
1175               MachineMemOperand::MONone, &Fast) &&
1176           Fast)
1177         TySize = Size;
1178       else {
1179         Ty = NewTy;
1180         TySize = NewTySize;
1181       }
1182     }
1183 
1184     if (++NumMemOps > Limit)
1185       return false;
1186 
1187     MemOps.push_back(Ty);
1188     Size -= TySize;
1189   }
1190 
1191   return true;
1192 }
1193 
1194 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
1195   if (Ty.isVector())
1196     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
1197                                 Ty.getNumElements());
1198   return IntegerType::get(C, Ty.getSizeInBits());
1199 }
1200 
1201 // Get a vectorized representation of the memset value operand, GISel edition.
1202 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
1203   MachineRegisterInfo &MRI = *MIB.getMRI();
1204   unsigned NumBits = Ty.getScalarSizeInBits();
1205   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1206   if (!Ty.isVector() && ValVRegAndVal) {
1207     APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
1208     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
1209     return MIB.buildConstant(Ty, SplatVal).getReg(0);
1210   }
1211 
1212   // Extend the byte value to the larger type, and then multiply by a magic
1213   // value 0x010101... in order to replicate it across every byte.
1214   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
1215   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
1216     return MIB.buildConstant(Ty, 0).getReg(0);
1217   }
1218 
1219   LLT ExtType = Ty.getScalarType();
1220   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
1221   if (NumBits > 8) {
1222     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
1223     auto MagicMI = MIB.buildConstant(ExtType, Magic);
1224     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
1225   }
1226 
1227   // For vector types create a G_BUILD_VECTOR.
1228   if (Ty.isVector())
1229     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
1230 
1231   return Val;
1232 }
1233 
1234 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
1235                                     Register Val, uint64_t KnownLen,
1236                                     Align Alignment, bool IsVolatile) {
1237   auto &MF = *MI.getParent()->getParent();
1238   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1239   auto &DL = MF.getDataLayout();
1240   LLVMContext &C = MF.getFunction().getContext();
1241 
1242   assert(KnownLen != 0 && "Have a zero length memset length!");
1243 
1244   bool DstAlignCanChange = false;
1245   MachineFrameInfo &MFI = MF.getFrameInfo();
1246   bool OptSize = shouldLowerMemFuncForSize(MF);
1247 
1248   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1249   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1250     DstAlignCanChange = true;
1251 
1252   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
1253   std::vector<LLT> MemOps;
1254 
1255   const auto &DstMMO = **MI.memoperands_begin();
1256   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1257 
1258   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1259   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1260 
1261   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1262                                      MemOp::Set(KnownLen, DstAlignCanChange,
1263                                                 Alignment,
1264                                                 /*IsZeroMemset=*/IsZeroVal,
1265                                                 /*IsVolatile=*/IsVolatile),
1266                                      DstPtrInfo.getAddrSpace(), ~0u,
1267                                      MF.getFunction().getAttributes(), TLI))
1268     return false;
1269 
1270   if (DstAlignCanChange) {
1271     // Get an estimate of the type from the LLT.
1272     Type *IRTy = getTypeForLLT(MemOps[0], C);
1273     Align NewAlign = DL.getABITypeAlign(IRTy);
1274     if (NewAlign > Alignment) {
1275       Alignment = NewAlign;
1276       unsigned FI = FIDef->getOperand(1).getIndex();
1277       // Give the stack frame object a larger alignment if needed.
1278       if (MFI.getObjectAlign(FI) < Alignment)
1279         MFI.setObjectAlignment(FI, Alignment);
1280     }
1281   }
1282 
1283   MachineIRBuilder MIB(MI);
1284   // Find the largest store and generate the bit pattern for it.
1285   LLT LargestTy = MemOps[0];
1286   for (unsigned i = 1; i < MemOps.size(); i++)
1287     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1288       LargestTy = MemOps[i];
1289 
1290   // The memset stored value is always defined as an s8, so in order to make it
1291   // work with larger store types we need to repeat the bit pattern across the
1292   // wider type.
1293   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1294 
1295   if (!MemSetValue)
1296     return false;
1297 
1298   // Generate the stores. For each store type in the list, we generate the
1299   // matching store of that type to the destination address.
1300   LLT PtrTy = MRI.getType(Dst);
1301   unsigned DstOff = 0;
1302   unsigned Size = KnownLen;
1303   for (unsigned I = 0; I < MemOps.size(); I++) {
1304     LLT Ty = MemOps[I];
1305     unsigned TySize = Ty.getSizeInBytes();
1306     if (TySize > Size) {
1307       // Issuing an unaligned load / store pair that overlaps with the previous
1308       // pair. Adjust the offset accordingly.
1309       assert(I == MemOps.size() - 1 && I != 0);
1310       DstOff -= TySize - Size;
1311     }
1312 
1313     // If this store is smaller than the largest store see whether we can get
1314     // the smaller value for free with a truncate.
1315     Register Value = MemSetValue;
1316     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1317       MVT VT = getMVTForLLT(Ty);
1318       MVT LargestVT = getMVTForLLT(LargestTy);
1319       if (!LargestTy.isVector() && !Ty.isVector() &&
1320           TLI.isTruncateFree(LargestVT, VT))
1321         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1322       else
1323         Value = getMemsetValue(Val, Ty, MIB);
1324       if (!Value)
1325         return false;
1326     }
1327 
1328     auto *StoreMMO =
1329         MF.getMachineMemOperand(&DstMMO, DstOff, Ty);
1330 
1331     Register Ptr = Dst;
1332     if (DstOff != 0) {
1333       auto Offset =
1334           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1335       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1336     }
1337 
1338     MIB.buildStore(Value, Ptr, *StoreMMO);
1339     DstOff += Ty.getSizeInBytes();
1340     Size -= TySize;
1341   }
1342 
1343   MI.eraseFromParent();
1344   return true;
1345 }
1346 
1347 bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI) {
1348   assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
1349 
1350   Register Dst = MI.getOperand(0).getReg();
1351   Register Src = MI.getOperand(1).getReg();
1352   Register Len = MI.getOperand(2).getReg();
1353 
1354   const auto *MMOIt = MI.memoperands_begin();
1355   const MachineMemOperand *MemOp = *MMOIt;
1356   bool IsVolatile = MemOp->isVolatile();
1357 
1358   // See if this is a constant length copy
1359   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1360   // FIXME: support dynamically sized G_MEMCPY_INLINE
1361   assert(LenVRegAndVal.hasValue() &&
1362          "inline memcpy with dynamic size is not yet supported");
1363   uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
1364   if (KnownLen == 0) {
1365     MI.eraseFromParent();
1366     return true;
1367   }
1368 
1369   const auto &DstMMO = **MI.memoperands_begin();
1370   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1371   Align DstAlign = DstMMO.getBaseAlign();
1372   Align SrcAlign = SrcMMO.getBaseAlign();
1373 
1374   return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
1375                              IsVolatile);
1376 }
1377 
1378 bool CombinerHelper::tryEmitMemcpyInline(MachineInstr &MI, Register Dst,
1379                                          Register Src, uint64_t KnownLen,
1380                                          Align DstAlign, Align SrcAlign,
1381                                          bool IsVolatile) {
1382   assert(MI.getOpcode() == TargetOpcode::G_MEMCPY_INLINE);
1383   return optimizeMemcpy(MI, Dst, Src, KnownLen,
1384                         std::numeric_limits<uint64_t>::max(), DstAlign,
1385                         SrcAlign, IsVolatile);
1386 }
1387 
1388 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1389                                     Register Src, uint64_t KnownLen,
1390                                     uint64_t Limit, Align DstAlign,
1391                                     Align SrcAlign, bool IsVolatile) {
1392   auto &MF = *MI.getParent()->getParent();
1393   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1394   auto &DL = MF.getDataLayout();
1395   LLVMContext &C = MF.getFunction().getContext();
1396 
1397   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1398 
1399   bool DstAlignCanChange = false;
1400   MachineFrameInfo &MFI = MF.getFrameInfo();
1401   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1402 
1403   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1404   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1405     DstAlignCanChange = true;
1406 
1407   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1408   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1409   // if the memcpy is in a tail call position.
1410 
1411   std::vector<LLT> MemOps;
1412 
1413   const auto &DstMMO = **MI.memoperands_begin();
1414   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1415   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1416   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1417 
1418   if (!findGISelOptimalMemOpLowering(
1419           MemOps, Limit,
1420           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1421                       IsVolatile),
1422           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1423           MF.getFunction().getAttributes(), TLI))
1424     return false;
1425 
1426   if (DstAlignCanChange) {
1427     // Get an estimate of the type from the LLT.
1428     Type *IRTy = getTypeForLLT(MemOps[0], C);
1429     Align NewAlign = DL.getABITypeAlign(IRTy);
1430 
1431     // Don't promote to an alignment that would require dynamic stack
1432     // realignment.
1433     if (!TRI->hasStackRealignment(MF))
1434       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1435         NewAlign = NewAlign / 2;
1436 
1437     if (NewAlign > Alignment) {
1438       Alignment = NewAlign;
1439       unsigned FI = FIDef->getOperand(1).getIndex();
1440       // Give the stack frame object a larger alignment if needed.
1441       if (MFI.getObjectAlign(FI) < Alignment)
1442         MFI.setObjectAlignment(FI, Alignment);
1443     }
1444   }
1445 
1446   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1447 
1448   MachineIRBuilder MIB(MI);
1449   // Now we need to emit a pair of load and stores for each of the types we've
1450   // collected. I.e. for each type, generate a load from the source pointer of
1451   // that type width, and then generate a corresponding store to the dest buffer
1452   // of that value loaded. This can result in a sequence of loads and stores
1453   // mixed types, depending on what the target specifies as good types to use.
1454   unsigned CurrOffset = 0;
1455   LLT PtrTy = MRI.getType(Src);
1456   unsigned Size = KnownLen;
1457   for (auto CopyTy : MemOps) {
1458     // Issuing an unaligned load / store pair  that overlaps with the previous
1459     // pair. Adjust the offset accordingly.
1460     if (CopyTy.getSizeInBytes() > Size)
1461       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1462 
1463     // Construct MMOs for the accesses.
1464     auto *LoadMMO =
1465         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1466     auto *StoreMMO =
1467         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1468 
1469     // Create the load.
1470     Register LoadPtr = Src;
1471     Register Offset;
1472     if (CurrOffset != 0) {
1473       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1474                    .getReg(0);
1475       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1476     }
1477     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1478 
1479     // Create the store.
1480     Register StorePtr =
1481         CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1482     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1483     CurrOffset += CopyTy.getSizeInBytes();
1484     Size -= CopyTy.getSizeInBytes();
1485   }
1486 
1487   MI.eraseFromParent();
1488   return true;
1489 }
1490 
1491 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1492                                      Register Src, uint64_t KnownLen,
1493                                      Align DstAlign, Align SrcAlign,
1494                                      bool IsVolatile) {
1495   auto &MF = *MI.getParent()->getParent();
1496   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1497   auto &DL = MF.getDataLayout();
1498   LLVMContext &C = MF.getFunction().getContext();
1499 
1500   assert(KnownLen != 0 && "Have a zero length memmove length!");
1501 
1502   bool DstAlignCanChange = false;
1503   MachineFrameInfo &MFI = MF.getFrameInfo();
1504   bool OptSize = shouldLowerMemFuncForSize(MF);
1505   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1506 
1507   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1508   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1509     DstAlignCanChange = true;
1510 
1511   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1512   std::vector<LLT> MemOps;
1513 
1514   const auto &DstMMO = **MI.memoperands_begin();
1515   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1516   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1517   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1518 
1519   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1520   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1521   // same thing here.
1522   if (!findGISelOptimalMemOpLowering(
1523           MemOps, Limit,
1524           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1525                       /*IsVolatile*/ true),
1526           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1527           MF.getFunction().getAttributes(), TLI))
1528     return false;
1529 
1530   if (DstAlignCanChange) {
1531     // Get an estimate of the type from the LLT.
1532     Type *IRTy = getTypeForLLT(MemOps[0], C);
1533     Align NewAlign = DL.getABITypeAlign(IRTy);
1534 
1535     // Don't promote to an alignment that would require dynamic stack
1536     // realignment.
1537     if (!TRI->hasStackRealignment(MF))
1538       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1539         NewAlign = NewAlign / 2;
1540 
1541     if (NewAlign > Alignment) {
1542       Alignment = NewAlign;
1543       unsigned FI = FIDef->getOperand(1).getIndex();
1544       // Give the stack frame object a larger alignment if needed.
1545       if (MFI.getObjectAlign(FI) < Alignment)
1546         MFI.setObjectAlignment(FI, Alignment);
1547     }
1548   }
1549 
1550   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1551 
1552   MachineIRBuilder MIB(MI);
1553   // Memmove requires that we perform the loads first before issuing the stores.
1554   // Apart from that, this loop is pretty much doing the same thing as the
1555   // memcpy codegen function.
1556   unsigned CurrOffset = 0;
1557   LLT PtrTy = MRI.getType(Src);
1558   SmallVector<Register, 16> LoadVals;
1559   for (auto CopyTy : MemOps) {
1560     // Construct MMO for the load.
1561     auto *LoadMMO =
1562         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1563 
1564     // Create the load.
1565     Register LoadPtr = Src;
1566     if (CurrOffset != 0) {
1567       auto Offset =
1568           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1569       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1570     }
1571     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1572     CurrOffset += CopyTy.getSizeInBytes();
1573   }
1574 
1575   CurrOffset = 0;
1576   for (unsigned I = 0; I < MemOps.size(); ++I) {
1577     LLT CopyTy = MemOps[I];
1578     // Now store the values loaded.
1579     auto *StoreMMO =
1580         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1581 
1582     Register StorePtr = Dst;
1583     if (CurrOffset != 0) {
1584       auto Offset =
1585           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1586       StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1587     }
1588     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1589     CurrOffset += CopyTy.getSizeInBytes();
1590   }
1591   MI.eraseFromParent();
1592   return true;
1593 }
1594 
1595 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1596   const unsigned Opc = MI.getOpcode();
1597   // This combine is fairly complex so it's not written with a separate
1598   // matcher function.
1599   assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
1600           Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
1601 
1602   auto MMOIt = MI.memoperands_begin();
1603   const MachineMemOperand *MemOp = *MMOIt;
1604 
1605   Align DstAlign = MemOp->getBaseAlign();
1606   Align SrcAlign;
1607   Register Dst = MI.getOperand(0).getReg();
1608   Register Src = MI.getOperand(1).getReg();
1609   Register Len = MI.getOperand(2).getReg();
1610 
1611   if (Opc != TargetOpcode::G_MEMSET) {
1612     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1613     MemOp = *(++MMOIt);
1614     SrcAlign = MemOp->getBaseAlign();
1615   }
1616 
1617   // See if this is a constant length copy
1618   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1619   if (!LenVRegAndVal)
1620     return false; // Leave it to the legalizer to lower it to a libcall.
1621   uint64_t KnownLen = LenVRegAndVal->Value.getZExtValue();
1622 
1623   if (KnownLen == 0) {
1624     MI.eraseFromParent();
1625     return true;
1626   }
1627 
1628   bool IsVolatile = MemOp->isVolatile();
1629   if (Opc == TargetOpcode::G_MEMCPY_INLINE)
1630     return tryEmitMemcpyInline(MI, Dst, Src, KnownLen, DstAlign, SrcAlign,
1631                                IsVolatile);
1632 
1633   // Don't try to optimize volatile.
1634   if (IsVolatile)
1635     return false;
1636 
1637   if (MaxLen && KnownLen > MaxLen)
1638     return false;
1639 
1640   if (Opc == TargetOpcode::G_MEMCPY) {
1641     auto &MF = *MI.getParent()->getParent();
1642     const auto &TLI = *MF.getSubtarget().getTargetLowering();
1643     bool OptSize = shouldLowerMemFuncForSize(MF);
1644     uint64_t Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1645     return optimizeMemcpy(MI, Dst, Src, KnownLen, Limit, DstAlign, SrcAlign,
1646                           IsVolatile);
1647   }
1648   if (Opc == TargetOpcode::G_MEMMOVE)
1649     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1650   if (Opc == TargetOpcode::G_MEMSET)
1651     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1652   return false;
1653 }
1654 
1655 static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
1656                                              const Register Op,
1657                                              const MachineRegisterInfo &MRI) {
1658   const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
1659   if (!MaybeCst)
1660     return None;
1661 
1662   APFloat V = MaybeCst->getValueAPF();
1663   switch (Opcode) {
1664   default:
1665     llvm_unreachable("Unexpected opcode!");
1666   case TargetOpcode::G_FNEG: {
1667     V.changeSign();
1668     return V;
1669   }
1670   case TargetOpcode::G_FABS: {
1671     V.clearSign();
1672     return V;
1673   }
1674   case TargetOpcode::G_FPTRUNC:
1675     break;
1676   case TargetOpcode::G_FSQRT: {
1677     bool Unused;
1678     V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1679     V = APFloat(sqrt(V.convertToDouble()));
1680     break;
1681   }
1682   case TargetOpcode::G_FLOG2: {
1683     bool Unused;
1684     V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1685     V = APFloat(log2(V.convertToDouble()));
1686     break;
1687   }
1688   }
1689   // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1690   // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
1691   // and `G_FLOG2` reach here.
1692   bool Unused;
1693   V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
1694   return V;
1695 }
1696 
1697 bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
1698                                                      Optional<APFloat> &Cst) {
1699   Register DstReg = MI.getOperand(0).getReg();
1700   Register SrcReg = MI.getOperand(1).getReg();
1701   LLT DstTy = MRI.getType(DstReg);
1702   Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
1703   return Cst.hasValue();
1704 }
1705 
1706 void CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
1707                                                      Optional<APFloat> &Cst) {
1708   assert(Cst.hasValue() && "Optional is unexpectedly empty!");
1709   Builder.setInstrAndDebugLoc(MI);
1710   MachineFunction &MF = Builder.getMF();
1711   auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
1712   Register DstReg = MI.getOperand(0).getReg();
1713   Builder.buildFConstant(DstReg, *FPVal);
1714   MI.eraseFromParent();
1715 }
1716 
1717 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1718                                            PtrAddChain &MatchInfo) {
1719   // We're trying to match the following pattern:
1720   //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1721   //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1722   // -->
1723   //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1724 
1725   if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1726     return false;
1727 
1728   Register Add2 = MI.getOperand(1).getReg();
1729   Register Imm1 = MI.getOperand(2).getReg();
1730   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1731   if (!MaybeImmVal)
1732     return false;
1733 
1734   MachineInstr *Add2Def = MRI.getVRegDef(Add2);
1735   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1736     return false;
1737 
1738   Register Base = Add2Def->getOperand(1).getReg();
1739   Register Imm2 = Add2Def->getOperand(2).getReg();
1740   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1741   if (!MaybeImm2Val)
1742     return false;
1743 
1744   // Check if the new combined immediate forms an illegal addressing mode.
1745   // Do not combine if it was legal before but would get illegal.
1746   // To do so, we need to find a load/store user of the pointer to get
1747   // the access type.
1748   Type *AccessTy = nullptr;
1749   auto &MF = *MI.getMF();
1750   for (auto &UseMI : MRI.use_nodbg_instructions(MI.getOperand(0).getReg())) {
1751     if (auto *LdSt = dyn_cast<GLoadStore>(&UseMI)) {
1752       AccessTy = getTypeForLLT(MRI.getType(LdSt->getReg(0)),
1753                                MF.getFunction().getContext());
1754       break;
1755     }
1756   }
1757   TargetLoweringBase::AddrMode AMNew;
1758   APInt CombinedImm = MaybeImmVal->Value + MaybeImm2Val->Value;
1759   AMNew.BaseOffs = CombinedImm.getSExtValue();
1760   if (AccessTy) {
1761     AMNew.HasBaseReg = true;
1762     TargetLoweringBase::AddrMode AMOld;
1763     AMOld.BaseOffs = MaybeImm2Val->Value.getSExtValue();
1764     AMOld.HasBaseReg = true;
1765     unsigned AS = MRI.getType(Add2).getAddressSpace();
1766     const auto &TLI = *MF.getSubtarget().getTargetLowering();
1767     if (TLI.isLegalAddressingMode(MF.getDataLayout(), AMOld, AccessTy, AS) &&
1768         !TLI.isLegalAddressingMode(MF.getDataLayout(), AMNew, AccessTy, AS))
1769       return false;
1770   }
1771 
1772   // Pass the combined immediate to the apply function.
1773   MatchInfo.Imm = AMNew.BaseOffs;
1774   MatchInfo.Base = Base;
1775   MatchInfo.Bank = getRegBank(Imm2);
1776   return true;
1777 }
1778 
1779 void CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1780                                            PtrAddChain &MatchInfo) {
1781   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1782   MachineIRBuilder MIB(MI);
1783   LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1784   auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1785   setRegBank(NewOffset.getReg(0), MatchInfo.Bank);
1786   Observer.changingInstr(MI);
1787   MI.getOperand(1).setReg(MatchInfo.Base);
1788   MI.getOperand(2).setReg(NewOffset.getReg(0));
1789   Observer.changedInstr(MI);
1790 }
1791 
1792 bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
1793                                           RegisterImmPair &MatchInfo) {
1794   // We're trying to match the following pattern with any of
1795   // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1796   //   %t1 = SHIFT %base, G_CONSTANT imm1
1797   //   %root = SHIFT %t1, G_CONSTANT imm2
1798   // -->
1799   //   %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1800 
1801   unsigned Opcode = MI.getOpcode();
1802   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1803           Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1804           Opcode == TargetOpcode::G_USHLSAT) &&
1805          "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1806 
1807   Register Shl2 = MI.getOperand(1).getReg();
1808   Register Imm1 = MI.getOperand(2).getReg();
1809   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1810   if (!MaybeImmVal)
1811     return false;
1812 
1813   MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1814   if (Shl2Def->getOpcode() != Opcode)
1815     return false;
1816 
1817   Register Base = Shl2Def->getOperand(1).getReg();
1818   Register Imm2 = Shl2Def->getOperand(2).getReg();
1819   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1820   if (!MaybeImm2Val)
1821     return false;
1822 
1823   // Pass the combined immediate to the apply function.
1824   MatchInfo.Imm =
1825       (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
1826   MatchInfo.Reg = Base;
1827 
1828   // There is no simple replacement for a saturating unsigned left shift that
1829   // exceeds the scalar size.
1830   if (Opcode == TargetOpcode::G_USHLSAT &&
1831       MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1832     return false;
1833 
1834   return true;
1835 }
1836 
1837 void CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
1838                                           RegisterImmPair &MatchInfo) {
1839   unsigned Opcode = MI.getOpcode();
1840   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1841           Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1842           Opcode == TargetOpcode::G_USHLSAT) &&
1843          "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1844 
1845   Builder.setInstrAndDebugLoc(MI);
1846   LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1847   unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1848   auto Imm = MatchInfo.Imm;
1849 
1850   if (Imm >= ScalarSizeInBits) {
1851     // Any logical shift that exceeds scalar size will produce zero.
1852     if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1853       Builder.buildConstant(MI.getOperand(0), 0);
1854       MI.eraseFromParent();
1855       return;
1856     }
1857     // Arithmetic shift and saturating signed left shift have no effect beyond
1858     // scalar size.
1859     Imm = ScalarSizeInBits - 1;
1860   }
1861 
1862   LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1863   Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1864   Observer.changingInstr(MI);
1865   MI.getOperand(1).setReg(MatchInfo.Reg);
1866   MI.getOperand(2).setReg(NewImm);
1867   Observer.changedInstr(MI);
1868 }
1869 
1870 bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
1871                                               ShiftOfShiftedLogic &MatchInfo) {
1872   // We're trying to match the following pattern with any of
1873   // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1874   // with any of G_AND/G_OR/G_XOR logic instructions.
1875   //   %t1 = SHIFT %X, G_CONSTANT C0
1876   //   %t2 = LOGIC %t1, %Y
1877   //   %root = SHIFT %t2, G_CONSTANT C1
1878   // -->
1879   //   %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1880   //   %t4 = SHIFT %Y, G_CONSTANT C1
1881   //   %root = LOGIC %t3, %t4
1882   unsigned ShiftOpcode = MI.getOpcode();
1883   assert((ShiftOpcode == TargetOpcode::G_SHL ||
1884           ShiftOpcode == TargetOpcode::G_ASHR ||
1885           ShiftOpcode == TargetOpcode::G_LSHR ||
1886           ShiftOpcode == TargetOpcode::G_USHLSAT ||
1887           ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1888          "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1889 
1890   // Match a one-use bitwise logic op.
1891   Register LogicDest = MI.getOperand(1).getReg();
1892   if (!MRI.hasOneNonDBGUse(LogicDest))
1893     return false;
1894 
1895   MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1896   unsigned LogicOpcode = LogicMI->getOpcode();
1897   if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1898       LogicOpcode != TargetOpcode::G_XOR)
1899     return false;
1900 
1901   // Find a matching one-use shift by constant.
1902   const Register C1 = MI.getOperand(2).getReg();
1903   auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
1904   if (!MaybeImmVal)
1905     return false;
1906 
1907   const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1908 
1909   auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1910     // Shift should match previous one and should be a one-use.
1911     if (MI->getOpcode() != ShiftOpcode ||
1912         !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1913       return false;
1914 
1915     // Must be a constant.
1916     auto MaybeImmVal =
1917         getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1918     if (!MaybeImmVal)
1919       return false;
1920 
1921     ShiftVal = MaybeImmVal->Value.getSExtValue();
1922     return true;
1923   };
1924 
1925   // Logic ops are commutative, so check each operand for a match.
1926   Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1927   MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1928   Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1929   MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1930   uint64_t C0Val;
1931 
1932   if (matchFirstShift(LogicMIOp1, C0Val)) {
1933     MatchInfo.LogicNonShiftReg = LogicMIReg2;
1934     MatchInfo.Shift2 = LogicMIOp1;
1935   } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1936     MatchInfo.LogicNonShiftReg = LogicMIReg1;
1937     MatchInfo.Shift2 = LogicMIOp2;
1938   } else
1939     return false;
1940 
1941   MatchInfo.ValSum = C0Val + C1Val;
1942 
1943   // The fold is not valid if the sum of the shift values exceeds bitwidth.
1944   if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1945     return false;
1946 
1947   MatchInfo.Logic = LogicMI;
1948   return true;
1949 }
1950 
1951 void CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
1952                                               ShiftOfShiftedLogic &MatchInfo) {
1953   unsigned Opcode = MI.getOpcode();
1954   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1955           Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1956           Opcode == TargetOpcode::G_SSHLSAT) &&
1957          "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1958 
1959   LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1960   LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1961   Builder.setInstrAndDebugLoc(MI);
1962 
1963   Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1964 
1965   Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1966   Register Shift1 =
1967       Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1968 
1969   Register Shift2Const = MI.getOperand(2).getReg();
1970   Register Shift2 = Builder
1971                         .buildInstr(Opcode, {DestType},
1972                                     {MatchInfo.LogicNonShiftReg, Shift2Const})
1973                         .getReg(0);
1974 
1975   Register Dest = MI.getOperand(0).getReg();
1976   Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1977 
1978   // These were one use so it's safe to remove them.
1979   MatchInfo.Shift2->eraseFromParentAndMarkDBGValuesForRemoval();
1980   MatchInfo.Logic->eraseFromParentAndMarkDBGValuesForRemoval();
1981 
1982   MI.eraseFromParent();
1983 }
1984 
1985 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1986                                           unsigned &ShiftVal) {
1987   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1988   auto MaybeImmVal =
1989       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1990   if (!MaybeImmVal)
1991     return false;
1992 
1993   ShiftVal = MaybeImmVal->Value.exactLogBase2();
1994   return (static_cast<int32_t>(ShiftVal) != -1);
1995 }
1996 
1997 void CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1998                                           unsigned &ShiftVal) {
1999   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2000   MachineIRBuilder MIB(MI);
2001   LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
2002   auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
2003   Observer.changingInstr(MI);
2004   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
2005   MI.getOperand(2).setReg(ShiftCst.getReg(0));
2006   Observer.changedInstr(MI);
2007 }
2008 
2009 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
2010 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
2011                                              RegisterImmPair &MatchData) {
2012   assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
2013 
2014   Register LHS = MI.getOperand(1).getReg();
2015 
2016   Register ExtSrc;
2017   if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
2018       !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
2019       !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
2020     return false;
2021 
2022   // TODO: Should handle vector splat.
2023   Register RHS = MI.getOperand(2).getReg();
2024   auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
2025   if (!MaybeShiftAmtVal)
2026     return false;
2027 
2028   if (LI) {
2029     LLT SrcTy = MRI.getType(ExtSrc);
2030 
2031     // We only really care about the legality with the shifted value. We can
2032     // pick any type the constant shift amount, so ask the target what to
2033     // use. Otherwise we would have to guess and hope it is reported as legal.
2034     LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
2035     if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
2036       return false;
2037   }
2038 
2039   int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
2040   MatchData.Reg = ExtSrc;
2041   MatchData.Imm = ShiftAmt;
2042 
2043   unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
2044   return MinLeadingZeros >= ShiftAmt;
2045 }
2046 
2047 void CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
2048                                              const RegisterImmPair &MatchData) {
2049   Register ExtSrcReg = MatchData.Reg;
2050   int64_t ShiftAmtVal = MatchData.Imm;
2051 
2052   LLT ExtSrcTy = MRI.getType(ExtSrcReg);
2053   Builder.setInstrAndDebugLoc(MI);
2054   auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
2055   auto NarrowShift =
2056       Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
2057   Builder.buildZExt(MI.getOperand(0), NarrowShift);
2058   MI.eraseFromParent();
2059 }
2060 
2061 bool CombinerHelper::matchCombineMergeUnmerge(MachineInstr &MI,
2062                                               Register &MatchInfo) {
2063   GMerge &Merge = cast<GMerge>(MI);
2064   SmallVector<Register, 16> MergedValues;
2065   for (unsigned I = 0; I < Merge.getNumSources(); ++I)
2066     MergedValues.emplace_back(Merge.getSourceReg(I));
2067 
2068   auto *Unmerge = getOpcodeDef<GUnmerge>(MergedValues[0], MRI);
2069   if (!Unmerge || Unmerge->getNumDefs() != Merge.getNumSources())
2070     return false;
2071 
2072   for (unsigned I = 0; I < MergedValues.size(); ++I)
2073     if (MergedValues[I] != Unmerge->getReg(I))
2074       return false;
2075 
2076   MatchInfo = Unmerge->getSourceReg();
2077   return true;
2078 }
2079 
2080 static Register peekThroughBitcast(Register Reg,
2081                                    const MachineRegisterInfo &MRI) {
2082   while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
2083     ;
2084 
2085   return Reg;
2086 }
2087 
2088 bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
2089     MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
2090   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2091          "Expected an unmerge");
2092   auto &Unmerge = cast<GUnmerge>(MI);
2093   Register SrcReg = peekThroughBitcast(Unmerge.getSourceReg(), MRI);
2094 
2095   auto *SrcInstr = getOpcodeDef<GMergeLikeOp>(SrcReg, MRI);
2096   if (!SrcInstr)
2097     return false;
2098 
2099   // Check the source type of the merge.
2100   LLT SrcMergeTy = MRI.getType(SrcInstr->getSourceReg(0));
2101   LLT Dst0Ty = MRI.getType(Unmerge.getReg(0));
2102   bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
2103   if (SrcMergeTy != Dst0Ty && !SameSize)
2104     return false;
2105   // They are the same now (modulo a bitcast).
2106   // We can collect all the src registers.
2107   for (unsigned Idx = 0; Idx < SrcInstr->getNumSources(); ++Idx)
2108     Operands.push_back(SrcInstr->getSourceReg(Idx));
2109   return true;
2110 }
2111 
2112 void CombinerHelper::applyCombineUnmergeMergeToPlainValues(
2113     MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
2114   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2115          "Expected an unmerge");
2116   assert((MI.getNumOperands() - 1 == Operands.size()) &&
2117          "Not enough operands to replace all defs");
2118   unsigned NumElems = MI.getNumOperands() - 1;
2119 
2120   LLT SrcTy = MRI.getType(Operands[0]);
2121   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2122   bool CanReuseInputDirectly = DstTy == SrcTy;
2123   Builder.setInstrAndDebugLoc(MI);
2124   for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2125     Register DstReg = MI.getOperand(Idx).getReg();
2126     Register SrcReg = Operands[Idx];
2127     if (CanReuseInputDirectly)
2128       replaceRegWith(MRI, DstReg, SrcReg);
2129     else
2130       Builder.buildCast(DstReg, SrcReg);
2131   }
2132   MI.eraseFromParent();
2133 }
2134 
2135 bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
2136                                                  SmallVectorImpl<APInt> &Csts) {
2137   unsigned SrcIdx = MI.getNumOperands() - 1;
2138   Register SrcReg = MI.getOperand(SrcIdx).getReg();
2139   MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2140   if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2141       SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2142     return false;
2143   // Break down the big constant in smaller ones.
2144   const MachineOperand &CstVal = SrcInstr->getOperand(1);
2145   APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2146                   ? CstVal.getCImm()->getValue()
2147                   : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2148 
2149   LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2150   unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2151   // Unmerge a constant.
2152   for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2153     Csts.emplace_back(Val.trunc(ShiftAmt));
2154     Val = Val.lshr(ShiftAmt);
2155   }
2156 
2157   return true;
2158 }
2159 
2160 void CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
2161                                                  SmallVectorImpl<APInt> &Csts) {
2162   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2163          "Expected an unmerge");
2164   assert((MI.getNumOperands() - 1 == Csts.size()) &&
2165          "Not enough operands to replace all defs");
2166   unsigned NumElems = MI.getNumOperands() - 1;
2167   Builder.setInstrAndDebugLoc(MI);
2168   for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2169     Register DstReg = MI.getOperand(Idx).getReg();
2170     Builder.buildConstant(DstReg, Csts[Idx]);
2171   }
2172 
2173   MI.eraseFromParent();
2174 }
2175 
2176 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
2177   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2178          "Expected an unmerge");
2179   // Check that all the lanes are dead except the first one.
2180   for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2181     if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2182       return false;
2183   }
2184   return true;
2185 }
2186 
2187 void CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
2188   Builder.setInstrAndDebugLoc(MI);
2189   Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2190   // Truncating a vector is going to truncate every single lane,
2191   // whereas we want the full lowbits.
2192   // Do the operation on a scalar instead.
2193   LLT SrcTy = MRI.getType(SrcReg);
2194   if (SrcTy.isVector())
2195     SrcReg =
2196         Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
2197 
2198   Register Dst0Reg = MI.getOperand(0).getReg();
2199   LLT Dst0Ty = MRI.getType(Dst0Reg);
2200   if (Dst0Ty.isVector()) {
2201     auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
2202     Builder.buildCast(Dst0Reg, MIB);
2203   } else
2204     Builder.buildTrunc(Dst0Reg, SrcReg);
2205   MI.eraseFromParent();
2206 }
2207 
2208 bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
2209   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2210          "Expected an unmerge");
2211   Register Dst0Reg = MI.getOperand(0).getReg();
2212   LLT Dst0Ty = MRI.getType(Dst0Reg);
2213   // G_ZEXT on vector applies to each lane, so it will
2214   // affect all destinations. Therefore we won't be able
2215   // to simplify the unmerge to just the first definition.
2216   if (Dst0Ty.isVector())
2217     return false;
2218   Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2219   LLT SrcTy = MRI.getType(SrcReg);
2220   if (SrcTy.isVector())
2221     return false;
2222 
2223   Register ZExtSrcReg;
2224   if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2225     return false;
2226 
2227   // Finally we can replace the first definition with
2228   // a zext of the source if the definition is big enough to hold
2229   // all of ZExtSrc bits.
2230   LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2231   return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2232 }
2233 
2234 void CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
2235   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2236          "Expected an unmerge");
2237 
2238   Register Dst0Reg = MI.getOperand(0).getReg();
2239 
2240   MachineInstr *ZExtInstr =
2241       MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2242   assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2243          "Expecting a G_ZEXT");
2244 
2245   Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2246   LLT Dst0Ty = MRI.getType(Dst0Reg);
2247   LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2248 
2249   Builder.setInstrAndDebugLoc(MI);
2250 
2251   if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2252     Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2253   } else {
2254     assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2255            "ZExt src doesn't fit in destination");
2256     replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2257   }
2258 
2259   Register ZeroReg;
2260   for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2261     if (!ZeroReg)
2262       ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2263     replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2264   }
2265   MI.eraseFromParent();
2266 }
2267 
2268 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
2269                                                 unsigned TargetShiftSize,
2270                                                 unsigned &ShiftVal) {
2271   assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2272           MI.getOpcode() == TargetOpcode::G_LSHR ||
2273           MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2274 
2275   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2276   if (Ty.isVector()) // TODO:
2277     return false;
2278 
2279   // Don't narrow further than the requested size.
2280   unsigned Size = Ty.getSizeInBits();
2281   if (Size <= TargetShiftSize)
2282     return false;
2283 
2284   auto MaybeImmVal =
2285     getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2286   if (!MaybeImmVal)
2287     return false;
2288 
2289   ShiftVal = MaybeImmVal->Value.getSExtValue();
2290   return ShiftVal >= Size / 2 && ShiftVal < Size;
2291 }
2292 
2293 void CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
2294                                                 const unsigned &ShiftVal) {
2295   Register DstReg = MI.getOperand(0).getReg();
2296   Register SrcReg = MI.getOperand(1).getReg();
2297   LLT Ty = MRI.getType(SrcReg);
2298   unsigned Size = Ty.getSizeInBits();
2299   unsigned HalfSize = Size / 2;
2300   assert(ShiftVal >= HalfSize);
2301 
2302   LLT HalfTy = LLT::scalar(HalfSize);
2303 
2304   Builder.setInstr(MI);
2305   auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2306   unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2307 
2308   if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2309     Register Narrowed = Unmerge.getReg(1);
2310 
2311     //  dst = G_LSHR s64:x, C for C >= 32
2312     // =>
2313     //   lo, hi = G_UNMERGE_VALUES x
2314     //   dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2315 
2316     if (NarrowShiftAmt != 0) {
2317       Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2318         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2319     }
2320 
2321     auto Zero = Builder.buildConstant(HalfTy, 0);
2322     Builder.buildMerge(DstReg, { Narrowed, Zero });
2323   } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2324     Register Narrowed = Unmerge.getReg(0);
2325     //  dst = G_SHL s64:x, C for C >= 32
2326     // =>
2327     //   lo, hi = G_UNMERGE_VALUES x
2328     //   dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2329     if (NarrowShiftAmt != 0) {
2330       Narrowed = Builder.buildShl(HalfTy, Narrowed,
2331         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2332     }
2333 
2334     auto Zero = Builder.buildConstant(HalfTy, 0);
2335     Builder.buildMerge(DstReg, { Zero, Narrowed });
2336   } else {
2337     assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2338     auto Hi = Builder.buildAShr(
2339       HalfTy, Unmerge.getReg(1),
2340       Builder.buildConstant(HalfTy, HalfSize - 1));
2341 
2342     if (ShiftVal == HalfSize) {
2343       // (G_ASHR i64:x, 32) ->
2344       //   G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2345       Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
2346     } else if (ShiftVal == Size - 1) {
2347       // Don't need a second shift.
2348       // (G_ASHR i64:x, 63) ->
2349       //   %narrowed = (G_ASHR hi_32(x), 31)
2350       //   G_MERGE_VALUES %narrowed, %narrowed
2351       Builder.buildMerge(DstReg, { Hi, Hi });
2352     } else {
2353       auto Lo = Builder.buildAShr(
2354         HalfTy, Unmerge.getReg(1),
2355         Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2356 
2357       // (G_ASHR i64:x, C) ->, for C >= 32
2358       //   G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2359       Builder.buildMerge(DstReg, { Lo, Hi });
2360     }
2361   }
2362 
2363   MI.eraseFromParent();
2364 }
2365 
2366 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
2367                                               unsigned TargetShiftAmount) {
2368   unsigned ShiftAmt;
2369   if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2370     applyCombineShiftToUnmerge(MI, ShiftAmt);
2371     return true;
2372   }
2373 
2374   return false;
2375 }
2376 
2377 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2378   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2379   Register DstReg = MI.getOperand(0).getReg();
2380   LLT DstTy = MRI.getType(DstReg);
2381   Register SrcReg = MI.getOperand(1).getReg();
2382   return mi_match(SrcReg, MRI,
2383                   m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2384 }
2385 
2386 void CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2387   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2388   Register DstReg = MI.getOperand(0).getReg();
2389   Builder.setInstr(MI);
2390   Builder.buildCopy(DstReg, Reg);
2391   MI.eraseFromParent();
2392 }
2393 
2394 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2395   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2396   Register SrcReg = MI.getOperand(1).getReg();
2397   return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
2398 }
2399 
2400 void CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2401   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2402   Register DstReg = MI.getOperand(0).getReg();
2403   Builder.setInstr(MI);
2404   Builder.buildZExtOrTrunc(DstReg, Reg);
2405   MI.eraseFromParent();
2406 }
2407 
2408 bool CombinerHelper::matchCombineAddP2IToPtrAdd(
2409     MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2410   assert(MI.getOpcode() == TargetOpcode::G_ADD);
2411   Register LHS = MI.getOperand(1).getReg();
2412   Register RHS = MI.getOperand(2).getReg();
2413   LLT IntTy = MRI.getType(LHS);
2414 
2415   // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2416   // instruction.
2417   PtrReg.second = false;
2418   for (Register SrcReg : {LHS, RHS}) {
2419     if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2420       // Don't handle cases where the integer is implicitly converted to the
2421       // pointer width.
2422       LLT PtrTy = MRI.getType(PtrReg.first);
2423       if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2424         return true;
2425     }
2426 
2427     PtrReg.second = true;
2428   }
2429 
2430   return false;
2431 }
2432 
2433 void CombinerHelper::applyCombineAddP2IToPtrAdd(
2434     MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2435   Register Dst = MI.getOperand(0).getReg();
2436   Register LHS = MI.getOperand(1).getReg();
2437   Register RHS = MI.getOperand(2).getReg();
2438 
2439   const bool DoCommute = PtrReg.second;
2440   if (DoCommute)
2441     std::swap(LHS, RHS);
2442   LHS = PtrReg.first;
2443 
2444   LLT PtrTy = MRI.getType(LHS);
2445 
2446   Builder.setInstrAndDebugLoc(MI);
2447   auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2448   Builder.buildPtrToInt(Dst, PtrAdd);
2449   MI.eraseFromParent();
2450 }
2451 
2452 bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
2453                                                   int64_t &NewCst) {
2454   auto &PtrAdd = cast<GPtrAdd>(MI);
2455   Register LHS = PtrAdd.getBaseReg();
2456   Register RHS = PtrAdd.getOffsetReg();
2457   MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2458 
2459   if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
2460     int64_t Cst;
2461     if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2462       NewCst = Cst + *RHSCst;
2463       return true;
2464     }
2465   }
2466 
2467   return false;
2468 }
2469 
2470 void CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
2471                                                   int64_t &NewCst) {
2472   auto &PtrAdd = cast<GPtrAdd>(MI);
2473   Register Dst = PtrAdd.getReg(0);
2474 
2475   Builder.setInstrAndDebugLoc(MI);
2476   Builder.buildConstant(Dst, NewCst);
2477   PtrAdd.eraseFromParent();
2478 }
2479 
2480 bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
2481   assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2482   Register DstReg = MI.getOperand(0).getReg();
2483   Register SrcReg = MI.getOperand(1).getReg();
2484   LLT DstTy = MRI.getType(DstReg);
2485   return mi_match(SrcReg, MRI,
2486                   m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2487 }
2488 
2489 bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
2490   assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2491   Register DstReg = MI.getOperand(0).getReg();
2492   Register SrcReg = MI.getOperand(1).getReg();
2493   LLT DstTy = MRI.getType(DstReg);
2494   if (mi_match(SrcReg, MRI,
2495                m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2496     unsigned DstSize = DstTy.getScalarSizeInBits();
2497     unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2498     return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2499   }
2500   return false;
2501 }
2502 
2503 bool CombinerHelper::matchCombineExtOfExt(
2504     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2505   assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2506           MI.getOpcode() == TargetOpcode::G_SEXT ||
2507           MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2508          "Expected a G_[ASZ]EXT");
2509   Register SrcReg = MI.getOperand(1).getReg();
2510   MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2511   // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2512   unsigned Opc = MI.getOpcode();
2513   unsigned SrcOpc = SrcMI->getOpcode();
2514   if (Opc == SrcOpc ||
2515       (Opc == TargetOpcode::G_ANYEXT &&
2516        (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2517       (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2518     MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2519     return true;
2520   }
2521   return false;
2522 }
2523 
2524 void CombinerHelper::applyCombineExtOfExt(
2525     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2526   assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2527           MI.getOpcode() == TargetOpcode::G_SEXT ||
2528           MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2529          "Expected a G_[ASZ]EXT");
2530 
2531   Register Reg = std::get<0>(MatchInfo);
2532   unsigned SrcExtOp = std::get<1>(MatchInfo);
2533 
2534   // Combine exts with the same opcode.
2535   if (MI.getOpcode() == SrcExtOp) {
2536     Observer.changingInstr(MI);
2537     MI.getOperand(1).setReg(Reg);
2538     Observer.changedInstr(MI);
2539     return;
2540   }
2541 
2542   // Combine:
2543   // - anyext([sz]ext x) to [sz]ext x
2544   // - sext(zext x) to zext x
2545   if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2546       (MI.getOpcode() == TargetOpcode::G_SEXT &&
2547        SrcExtOp == TargetOpcode::G_ZEXT)) {
2548     Register DstReg = MI.getOperand(0).getReg();
2549     Builder.setInstrAndDebugLoc(MI);
2550     Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2551     MI.eraseFromParent();
2552   }
2553 }
2554 
2555 void CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
2556   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2557   Register DstReg = MI.getOperand(0).getReg();
2558   Register SrcReg = MI.getOperand(1).getReg();
2559   LLT DstTy = MRI.getType(DstReg);
2560 
2561   Builder.setInstrAndDebugLoc(MI);
2562   Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
2563                    MI.getFlags());
2564   MI.eraseFromParent();
2565 }
2566 
2567 bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
2568   assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
2569   Register SrcReg = MI.getOperand(1).getReg();
2570   return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
2571 }
2572 
2573 bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
2574   assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
2575   Src = MI.getOperand(1).getReg();
2576   Register AbsSrc;
2577   return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
2578 }
2579 
2580 bool CombinerHelper::matchCombineTruncOfExt(
2581     MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2582   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2583   Register SrcReg = MI.getOperand(1).getReg();
2584   MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2585   unsigned SrcOpc = SrcMI->getOpcode();
2586   if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2587       SrcOpc == TargetOpcode::G_ZEXT) {
2588     MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2589     return true;
2590   }
2591   return false;
2592 }
2593 
2594 void CombinerHelper::applyCombineTruncOfExt(
2595     MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2596   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2597   Register SrcReg = MatchInfo.first;
2598   unsigned SrcExtOp = MatchInfo.second;
2599   Register DstReg = MI.getOperand(0).getReg();
2600   LLT SrcTy = MRI.getType(SrcReg);
2601   LLT DstTy = MRI.getType(DstReg);
2602   if (SrcTy == DstTy) {
2603     MI.eraseFromParent();
2604     replaceRegWith(MRI, DstReg, SrcReg);
2605     return;
2606   }
2607   Builder.setInstrAndDebugLoc(MI);
2608   if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2609     Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2610   else
2611     Builder.buildTrunc(DstReg, SrcReg);
2612   MI.eraseFromParent();
2613 }
2614 
2615 bool CombinerHelper::matchCombineTruncOfShl(
2616     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2617   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2618   Register DstReg = MI.getOperand(0).getReg();
2619   Register SrcReg = MI.getOperand(1).getReg();
2620   LLT DstTy = MRI.getType(DstReg);
2621   Register ShiftSrc;
2622   Register ShiftAmt;
2623 
2624   if (MRI.hasOneNonDBGUse(SrcReg) &&
2625       mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
2626       isLegalOrBeforeLegalizer(
2627           {TargetOpcode::G_SHL,
2628            {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
2629     KnownBits Known = KB->getKnownBits(ShiftAmt);
2630     unsigned Size = DstTy.getSizeInBits();
2631     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
2632       MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
2633       return true;
2634     }
2635   }
2636   return false;
2637 }
2638 
2639 void CombinerHelper::applyCombineTruncOfShl(
2640     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2641   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2642   Register DstReg = MI.getOperand(0).getReg();
2643   Register SrcReg = MI.getOperand(1).getReg();
2644   LLT DstTy = MRI.getType(DstReg);
2645   MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2646 
2647   Register ShiftSrc = MatchInfo.first;
2648   Register ShiftAmt = MatchInfo.second;
2649   Builder.setInstrAndDebugLoc(MI);
2650   auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
2651   Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
2652   MI.eraseFromParent();
2653 }
2654 
2655 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
2656   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2657     return MO.isReg() &&
2658            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2659   });
2660 }
2661 
2662 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
2663   return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2664     return !MO.isReg() ||
2665            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2666   });
2667 }
2668 
2669 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
2670   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2671   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2672   return all_of(Mask, [](int Elt) { return Elt < 0; });
2673 }
2674 
2675 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
2676   assert(MI.getOpcode() == TargetOpcode::G_STORE);
2677   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2678                       MRI);
2679 }
2680 
2681 bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
2682   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2683   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2684                       MRI);
2685 }
2686 
2687 bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
2688   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2689   if (auto MaybeCstCmp =
2690           getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
2691     OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
2692     return true;
2693   }
2694   return false;
2695 }
2696 
2697 bool CombinerHelper::eraseInst(MachineInstr &MI) {
2698   MI.eraseFromParent();
2699   return true;
2700 }
2701 
2702 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
2703                                     const MachineOperand &MOP2) {
2704   if (!MOP1.isReg() || !MOP2.isReg())
2705     return false;
2706   auto InstAndDef1 = getDefSrcRegIgnoringCopies(MOP1.getReg(), MRI);
2707   if (!InstAndDef1)
2708     return false;
2709   auto InstAndDef2 = getDefSrcRegIgnoringCopies(MOP2.getReg(), MRI);
2710   if (!InstAndDef2)
2711     return false;
2712   MachineInstr *I1 = InstAndDef1->MI;
2713   MachineInstr *I2 = InstAndDef2->MI;
2714 
2715   // Handle a case like this:
2716   //
2717   // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2718   //
2719   // Even though %0 and %1 are produced by the same instruction they are not
2720   // the same values.
2721   if (I1 == I2)
2722     return MOP1.getReg() == MOP2.getReg();
2723 
2724   // If we have an instruction which loads or stores, we can't guarantee that
2725   // it is identical.
2726   //
2727   // For example, we may have
2728   //
2729   // %x1 = G_LOAD %addr (load N from @somewhere)
2730   // ...
2731   // call @foo
2732   // ...
2733   // %x2 = G_LOAD %addr (load N from @somewhere)
2734   // ...
2735   // %or = G_OR %x1, %x2
2736   //
2737   // It's possible that @foo will modify whatever lives at the address we're
2738   // loading from. To be safe, let's just assume that all loads and stores
2739   // are different (unless we have something which is guaranteed to not
2740   // change.)
2741   if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
2742     return false;
2743 
2744   // Check for physical registers on the instructions first to avoid cases
2745   // like this:
2746   //
2747   // %a = COPY $physreg
2748   // ...
2749   // SOMETHING implicit-def $physreg
2750   // ...
2751   // %b = COPY $physreg
2752   //
2753   // These copies are not equivalent.
2754   if (any_of(I1->uses(), [](const MachineOperand &MO) {
2755         return MO.isReg() && MO.getReg().isPhysical();
2756       })) {
2757     // Check if we have a case like this:
2758     //
2759     // %a = COPY $physreg
2760     // %b = COPY %a
2761     //
2762     // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2763     // From that, we know that they must have the same value, since they must
2764     // have come from the same COPY.
2765     return I1->isIdenticalTo(*I2);
2766   }
2767 
2768   // We don't have any physical registers, so we don't necessarily need the
2769   // same vreg defs.
2770   //
2771   // On the off-chance that there's some target instruction feeding into the
2772   // instruction, let's use produceSameValue instead of isIdenticalTo.
2773   if (Builder.getTII().produceSameValue(*I1, *I2, &MRI)) {
2774     // Handle instructions with multiple defs that produce same values. Values
2775     // are same for operands with same index.
2776     // %0:_(s8), %1:_(s8), %2:_(s8), %3:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2777     // %5:_(s8), %6:_(s8), %7:_(s8), %8:_(s8) = G_UNMERGE_VALUES %4:_(<4 x s8>)
2778     // I1 and I2 are different instructions but produce same values,
2779     // %1 and %6 are same, %1 and %7 are not the same value.
2780     return I1->findRegisterDefOperandIdx(InstAndDef1->Reg) ==
2781            I2->findRegisterDefOperandIdx(InstAndDef2->Reg);
2782   }
2783   return false;
2784 }
2785 
2786 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
2787   if (!MOP.isReg())
2788     return false;
2789   // MIPatternMatch doesn't let us look through G_ZEXT etc.
2790   auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
2791   return ValAndVReg && ValAndVReg->Value == C;
2792 }
2793 
2794 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
2795                                                      unsigned OpIdx) {
2796   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2797   Register OldReg = MI.getOperand(0).getReg();
2798   Register Replacement = MI.getOperand(OpIdx).getReg();
2799   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2800   MI.eraseFromParent();
2801   replaceRegWith(MRI, OldReg, Replacement);
2802   return true;
2803 }
2804 
2805 bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
2806                                                  Register Replacement) {
2807   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2808   Register OldReg = MI.getOperand(0).getReg();
2809   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2810   MI.eraseFromParent();
2811   replaceRegWith(MRI, OldReg, Replacement);
2812   return true;
2813 }
2814 
2815 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
2816   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2817   // Match (cond ? x : x)
2818   return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2819          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2820                        MRI);
2821 }
2822 
2823 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
2824   return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2825          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2826                        MRI);
2827 }
2828 
2829 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
2830   return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2831          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2832                        MRI);
2833 }
2834 
2835 bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
2836   MachineOperand &MO = MI.getOperand(OpIdx);
2837   return MO.isReg() &&
2838          getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2839 }
2840 
2841 bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
2842                                                         unsigned OpIdx) {
2843   MachineOperand &MO = MI.getOperand(OpIdx);
2844   return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2845 }
2846 
2847 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
2848   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2849   Builder.setInstr(MI);
2850   Builder.buildFConstant(MI.getOperand(0), C);
2851   MI.eraseFromParent();
2852   return true;
2853 }
2854 
2855 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
2856   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2857   Builder.setInstr(MI);
2858   Builder.buildConstant(MI.getOperand(0), C);
2859   MI.eraseFromParent();
2860   return true;
2861 }
2862 
2863 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, APInt C) {
2864   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2865   Builder.setInstr(MI);
2866   Builder.buildConstant(MI.getOperand(0), C);
2867   MI.eraseFromParent();
2868   return true;
2869 }
2870 
2871 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
2872   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2873   Builder.setInstr(MI);
2874   Builder.buildUndef(MI.getOperand(0));
2875   MI.eraseFromParent();
2876   return true;
2877 }
2878 
2879 bool CombinerHelper::matchSimplifyAddToSub(
2880     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2881   Register LHS = MI.getOperand(1).getReg();
2882   Register RHS = MI.getOperand(2).getReg();
2883   Register &NewLHS = std::get<0>(MatchInfo);
2884   Register &NewRHS = std::get<1>(MatchInfo);
2885 
2886   // Helper lambda to check for opportunities for
2887   // ((0-A) + B) -> B - A
2888   // (A + (0-B)) -> A - B
2889   auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2890     if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2891       return false;
2892     NewLHS = MaybeNewLHS;
2893     return true;
2894   };
2895 
2896   return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2897 }
2898 
2899 bool CombinerHelper::matchCombineInsertVecElts(
2900     MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2901   assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2902          "Invalid opcode");
2903   Register DstReg = MI.getOperand(0).getReg();
2904   LLT DstTy = MRI.getType(DstReg);
2905   assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2906   unsigned NumElts = DstTy.getNumElements();
2907   // If this MI is part of a sequence of insert_vec_elts, then
2908   // don't do the combine in the middle of the sequence.
2909   if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2910                                    TargetOpcode::G_INSERT_VECTOR_ELT)
2911     return false;
2912   MachineInstr *CurrInst = &MI;
2913   MachineInstr *TmpInst;
2914   int64_t IntImm;
2915   Register TmpReg;
2916   MatchInfo.resize(NumElts);
2917   while (mi_match(
2918       CurrInst->getOperand(0).getReg(), MRI,
2919       m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2920     if (IntImm >= NumElts)
2921       return false;
2922     if (!MatchInfo[IntImm])
2923       MatchInfo[IntImm] = TmpReg;
2924     CurrInst = TmpInst;
2925   }
2926   // Variable index.
2927   if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2928     return false;
2929   if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2930     for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2931       if (!MatchInfo[I - 1].isValid())
2932         MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2933     }
2934     return true;
2935   }
2936   // If we didn't end in a G_IMPLICIT_DEF, bail out.
2937   return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
2938 }
2939 
2940 void CombinerHelper::applyCombineInsertVecElts(
2941     MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2942   Builder.setInstr(MI);
2943   Register UndefReg;
2944   auto GetUndef = [&]() {
2945     if (UndefReg)
2946       return UndefReg;
2947     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2948     UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
2949     return UndefReg;
2950   };
2951   for (unsigned I = 0; I < MatchInfo.size(); ++I) {
2952     if (!MatchInfo[I])
2953       MatchInfo[I] = GetUndef();
2954   }
2955   Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
2956   MI.eraseFromParent();
2957 }
2958 
2959 void CombinerHelper::applySimplifyAddToSub(
2960     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2961   Builder.setInstr(MI);
2962   Register SubLHS, SubRHS;
2963   std::tie(SubLHS, SubRHS) = MatchInfo;
2964   Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2965   MI.eraseFromParent();
2966 }
2967 
2968 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
2969     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2970   // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2971   //
2972   // Creates the new hand + logic instruction (but does not insert them.)
2973   //
2974   // On success, MatchInfo is populated with the new instructions. These are
2975   // inserted in applyHoistLogicOpWithSameOpcodeHands.
2976   unsigned LogicOpcode = MI.getOpcode();
2977   assert(LogicOpcode == TargetOpcode::G_AND ||
2978          LogicOpcode == TargetOpcode::G_OR ||
2979          LogicOpcode == TargetOpcode::G_XOR);
2980   MachineIRBuilder MIB(MI);
2981   Register Dst = MI.getOperand(0).getReg();
2982   Register LHSReg = MI.getOperand(1).getReg();
2983   Register RHSReg = MI.getOperand(2).getReg();
2984 
2985   // Don't recompute anything.
2986   if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
2987     return false;
2988 
2989   // Make sure we have (hand x, ...), (hand y, ...)
2990   MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
2991   MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
2992   if (!LeftHandInst || !RightHandInst)
2993     return false;
2994   unsigned HandOpcode = LeftHandInst->getOpcode();
2995   if (HandOpcode != RightHandInst->getOpcode())
2996     return false;
2997   if (!LeftHandInst->getOperand(1).isReg() ||
2998       !RightHandInst->getOperand(1).isReg())
2999     return false;
3000 
3001   // Make sure the types match up, and if we're doing this post-legalization,
3002   // we end up with legal types.
3003   Register X = LeftHandInst->getOperand(1).getReg();
3004   Register Y = RightHandInst->getOperand(1).getReg();
3005   LLT XTy = MRI.getType(X);
3006   LLT YTy = MRI.getType(Y);
3007   if (XTy != YTy)
3008     return false;
3009   if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
3010     return false;
3011 
3012   // Optional extra source register.
3013   Register ExtraHandOpSrcReg;
3014   switch (HandOpcode) {
3015   default:
3016     return false;
3017   case TargetOpcode::G_ANYEXT:
3018   case TargetOpcode::G_SEXT:
3019   case TargetOpcode::G_ZEXT: {
3020     // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
3021     break;
3022   }
3023   case TargetOpcode::G_AND:
3024   case TargetOpcode::G_ASHR:
3025   case TargetOpcode::G_LSHR:
3026   case TargetOpcode::G_SHL: {
3027     // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
3028     MachineOperand &ZOp = LeftHandInst->getOperand(2);
3029     if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
3030       return false;
3031     ExtraHandOpSrcReg = ZOp.getReg();
3032     break;
3033   }
3034   }
3035 
3036   // Record the steps to build the new instructions.
3037   //
3038   // Steps to build (logic x, y)
3039   auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
3040   OperandBuildSteps LogicBuildSteps = {
3041       [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
3042       [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
3043       [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
3044   InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
3045 
3046   // Steps to build hand (logic x, y), ...z
3047   OperandBuildSteps HandBuildSteps = {
3048       [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
3049       [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
3050   if (ExtraHandOpSrcReg.isValid())
3051     HandBuildSteps.push_back(
3052         [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
3053   InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
3054 
3055   MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
3056   return true;
3057 }
3058 
3059 void CombinerHelper::applyBuildInstructionSteps(
3060     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
3061   assert(MatchInfo.InstrsToBuild.size() &&
3062          "Expected at least one instr to build?");
3063   Builder.setInstr(MI);
3064   for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
3065     assert(InstrToBuild.Opcode && "Expected a valid opcode?");
3066     assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
3067     MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
3068     for (auto &OperandFn : InstrToBuild.OperandFns)
3069       OperandFn(Instr);
3070   }
3071   MI.eraseFromParent();
3072 }
3073 
3074 bool CombinerHelper::matchAshrShlToSextInreg(
3075     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3076   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3077   int64_t ShlCst, AshrCst;
3078   Register Src;
3079   // FIXME: detect splat constant vectors.
3080   if (!mi_match(MI.getOperand(0).getReg(), MRI,
3081                 m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
3082     return false;
3083   if (ShlCst != AshrCst)
3084     return false;
3085   if (!isLegalOrBeforeLegalizer(
3086           {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
3087     return false;
3088   MatchInfo = std::make_tuple(Src, ShlCst);
3089   return true;
3090 }
3091 
3092 void CombinerHelper::applyAshShlToSextInreg(
3093     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
3094   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
3095   Register Src;
3096   int64_t ShiftAmt;
3097   std::tie(Src, ShiftAmt) = MatchInfo;
3098   unsigned Size = MRI.getType(Src).getScalarSizeInBits();
3099   Builder.setInstrAndDebugLoc(MI);
3100   Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
3101   MI.eraseFromParent();
3102 }
3103 
3104 /// and(and(x, C1), C2) -> C1&C2 ? and(x, C1&C2) : 0
3105 bool CombinerHelper::matchOverlappingAnd(
3106     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3107   assert(MI.getOpcode() == TargetOpcode::G_AND);
3108 
3109   Register Dst = MI.getOperand(0).getReg();
3110   LLT Ty = MRI.getType(Dst);
3111 
3112   Register R;
3113   int64_t C1;
3114   int64_t C2;
3115   if (!mi_match(
3116           Dst, MRI,
3117           m_GAnd(m_GAnd(m_Reg(R), m_ICst(C1)), m_ICst(C2))))
3118     return false;
3119 
3120   MatchInfo = [=](MachineIRBuilder &B) {
3121     if (C1 & C2) {
3122       B.buildAnd(Dst, R, B.buildConstant(Ty, C1 & C2));
3123       return;
3124     }
3125     auto Zero = B.buildConstant(Ty, 0);
3126     replaceRegWith(MRI, Dst, Zero->getOperand(0).getReg());
3127   };
3128   return true;
3129 }
3130 
3131 bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
3132                                        Register &Replacement) {
3133   // Given
3134   //
3135   // %y:_(sN) = G_SOMETHING
3136   // %x:_(sN) = G_SOMETHING
3137   // %res:_(sN) = G_AND %x, %y
3138   //
3139   // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3140   //
3141   // Patterns like this can appear as a result of legalization. E.g.
3142   //
3143   // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3144   // %one:_(s32) = G_CONSTANT i32 1
3145   // %and:_(s32) = G_AND %cmp, %one
3146   //
3147   // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3148   assert(MI.getOpcode() == TargetOpcode::G_AND);
3149   if (!KB)
3150     return false;
3151 
3152   Register AndDst = MI.getOperand(0).getReg();
3153   LLT DstTy = MRI.getType(AndDst);
3154 
3155   // FIXME: This should be removed once GISelKnownBits supports vectors.
3156   if (DstTy.isVector())
3157     return false;
3158 
3159   Register LHS = MI.getOperand(1).getReg();
3160   Register RHS = MI.getOperand(2).getReg();
3161   KnownBits LHSBits = KB->getKnownBits(LHS);
3162   KnownBits RHSBits = KB->getKnownBits(RHS);
3163 
3164   // Check that x & Mask == x.
3165   // x & 1 == x, always
3166   // x & 0 == x, only if x is also 0
3167   // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3168   //
3169   // Check if we can replace AndDst with the LHS of the G_AND
3170   if (canReplaceReg(AndDst, LHS, MRI) &&
3171       (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
3172     Replacement = LHS;
3173     return true;
3174   }
3175 
3176   // Check if we can replace AndDst with the RHS of the G_AND
3177   if (canReplaceReg(AndDst, RHS, MRI) &&
3178       (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
3179     Replacement = RHS;
3180     return true;
3181   }
3182 
3183   return false;
3184 }
3185 
3186 bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
3187   // Given
3188   //
3189   // %y:_(sN) = G_SOMETHING
3190   // %x:_(sN) = G_SOMETHING
3191   // %res:_(sN) = G_OR %x, %y
3192   //
3193   // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3194   assert(MI.getOpcode() == TargetOpcode::G_OR);
3195   if (!KB)
3196     return false;
3197 
3198   Register OrDst = MI.getOperand(0).getReg();
3199   LLT DstTy = MRI.getType(OrDst);
3200 
3201   // FIXME: This should be removed once GISelKnownBits supports vectors.
3202   if (DstTy.isVector())
3203     return false;
3204 
3205   Register LHS = MI.getOperand(1).getReg();
3206   Register RHS = MI.getOperand(2).getReg();
3207   KnownBits LHSBits = KB->getKnownBits(LHS);
3208   KnownBits RHSBits = KB->getKnownBits(RHS);
3209 
3210   // Check that x | Mask == x.
3211   // x | 0 == x, always
3212   // x | 1 == x, only if x is also 1
3213   // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3214   //
3215   // Check if we can replace OrDst with the LHS of the G_OR
3216   if (canReplaceReg(OrDst, LHS, MRI) &&
3217       (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
3218     Replacement = LHS;
3219     return true;
3220   }
3221 
3222   // Check if we can replace OrDst with the RHS of the G_OR
3223   if (canReplaceReg(OrDst, RHS, MRI) &&
3224       (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
3225     Replacement = RHS;
3226     return true;
3227   }
3228 
3229   return false;
3230 }
3231 
3232 bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
3233   // If the input is already sign extended, just drop the extension.
3234   Register Src = MI.getOperand(1).getReg();
3235   unsigned ExtBits = MI.getOperand(2).getImm();
3236   unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3237   return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3238 }
3239 
3240 static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3241                              int64_t Cst, bool IsVector, bool IsFP) {
3242   // For i1, Cst will always be -1 regardless of boolean contents.
3243   return (ScalarSizeBits == 1 && Cst == -1) ||
3244          isConstTrueVal(TLI, Cst, IsVector, IsFP);
3245 }
3246 
3247 bool CombinerHelper::matchNotCmp(MachineInstr &MI,
3248                                  SmallVectorImpl<Register> &RegsToNegate) {
3249   assert(MI.getOpcode() == TargetOpcode::G_XOR);
3250   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3251   const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3252   Register XorSrc;
3253   Register CstReg;
3254   // We match xor(src, true) here.
3255   if (!mi_match(MI.getOperand(0).getReg(), MRI,
3256                 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3257     return false;
3258 
3259   if (!MRI.hasOneNonDBGUse(XorSrc))
3260     return false;
3261 
3262   // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3263   // and ORs. The suffix of RegsToNegate starting from index I is used a work
3264   // list of tree nodes to visit.
3265   RegsToNegate.push_back(XorSrc);
3266   // Remember whether the comparisons are all integer or all floating point.
3267   bool IsInt = false;
3268   bool IsFP = false;
3269   for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3270     Register Reg = RegsToNegate[I];
3271     if (!MRI.hasOneNonDBGUse(Reg))
3272       return false;
3273     MachineInstr *Def = MRI.getVRegDef(Reg);
3274     switch (Def->getOpcode()) {
3275     default:
3276       // Don't match if the tree contains anything other than ANDs, ORs and
3277       // comparisons.
3278       return false;
3279     case TargetOpcode::G_ICMP:
3280       if (IsFP)
3281         return false;
3282       IsInt = true;
3283       // When we apply the combine we will invert the predicate.
3284       break;
3285     case TargetOpcode::G_FCMP:
3286       if (IsInt)
3287         return false;
3288       IsFP = true;
3289       // When we apply the combine we will invert the predicate.
3290       break;
3291     case TargetOpcode::G_AND:
3292     case TargetOpcode::G_OR:
3293       // Implement De Morgan's laws:
3294       // ~(x & y) -> ~x | ~y
3295       // ~(x | y) -> ~x & ~y
3296       // When we apply the combine we will change the opcode and recursively
3297       // negate the operands.
3298       RegsToNegate.push_back(Def->getOperand(1).getReg());
3299       RegsToNegate.push_back(Def->getOperand(2).getReg());
3300       break;
3301     }
3302   }
3303 
3304   // Now we know whether the comparisons are integer or floating point, check
3305   // the constant in the xor.
3306   int64_t Cst;
3307   if (Ty.isVector()) {
3308     MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3309     auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
3310     if (!MaybeCst)
3311       return false;
3312     if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3313       return false;
3314   } else {
3315     if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3316       return false;
3317     if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3318       return false;
3319   }
3320 
3321   return true;
3322 }
3323 
3324 void CombinerHelper::applyNotCmp(MachineInstr &MI,
3325                                  SmallVectorImpl<Register> &RegsToNegate) {
3326   for (Register Reg : RegsToNegate) {
3327     MachineInstr *Def = MRI.getVRegDef(Reg);
3328     Observer.changingInstr(*Def);
3329     // For each comparison, invert the opcode. For each AND and OR, change the
3330     // opcode.
3331     switch (Def->getOpcode()) {
3332     default:
3333       llvm_unreachable("Unexpected opcode");
3334     case TargetOpcode::G_ICMP:
3335     case TargetOpcode::G_FCMP: {
3336       MachineOperand &PredOp = Def->getOperand(1);
3337       CmpInst::Predicate NewP = CmpInst::getInversePredicate(
3338           (CmpInst::Predicate)PredOp.getPredicate());
3339       PredOp.setPredicate(NewP);
3340       break;
3341     }
3342     case TargetOpcode::G_AND:
3343       Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3344       break;
3345     case TargetOpcode::G_OR:
3346       Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3347       break;
3348     }
3349     Observer.changedInstr(*Def);
3350   }
3351 
3352   replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3353   MI.eraseFromParent();
3354 }
3355 
3356 bool CombinerHelper::matchXorOfAndWithSameReg(
3357     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3358   // Match (xor (and x, y), y) (or any of its commuted cases)
3359   assert(MI.getOpcode() == TargetOpcode::G_XOR);
3360   Register &X = MatchInfo.first;
3361   Register &Y = MatchInfo.second;
3362   Register AndReg = MI.getOperand(1).getReg();
3363   Register SharedReg = MI.getOperand(2).getReg();
3364 
3365   // Find a G_AND on either side of the G_XOR.
3366   // Look for one of
3367   //
3368   // (xor (and x, y), SharedReg)
3369   // (xor SharedReg, (and x, y))
3370   if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3371     std::swap(AndReg, SharedReg);
3372     if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3373       return false;
3374   }
3375 
3376   // Only do this if we'll eliminate the G_AND.
3377   if (!MRI.hasOneNonDBGUse(AndReg))
3378     return false;
3379 
3380   // We can combine if SharedReg is the same as either the LHS or RHS of the
3381   // G_AND.
3382   if (Y != SharedReg)
3383     std::swap(X, Y);
3384   return Y == SharedReg;
3385 }
3386 
3387 void CombinerHelper::applyXorOfAndWithSameReg(
3388     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3389   // Fold (xor (and x, y), y) -> (and (not x), y)
3390   Builder.setInstrAndDebugLoc(MI);
3391   Register X, Y;
3392   std::tie(X, Y) = MatchInfo;
3393   auto Not = Builder.buildNot(MRI.getType(X), X);
3394   Observer.changingInstr(MI);
3395   MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3396   MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3397   MI.getOperand(2).setReg(Y);
3398   Observer.changedInstr(MI);
3399 }
3400 
3401 bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
3402   auto &PtrAdd = cast<GPtrAdd>(MI);
3403   Register DstReg = PtrAdd.getReg(0);
3404   LLT Ty = MRI.getType(DstReg);
3405   const DataLayout &DL = Builder.getMF().getDataLayout();
3406 
3407   if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3408     return false;
3409 
3410   if (Ty.isPointer()) {
3411     auto ConstVal = getConstantVRegVal(PtrAdd.getBaseReg(), MRI);
3412     return ConstVal && *ConstVal == 0;
3413   }
3414 
3415   assert(Ty.isVector() && "Expecting a vector type");
3416   const MachineInstr *VecMI = MRI.getVRegDef(PtrAdd.getBaseReg());
3417   return isBuildVectorAllZeros(*VecMI, MRI);
3418 }
3419 
3420 void CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
3421   auto &PtrAdd = cast<GPtrAdd>(MI);
3422   Builder.setInstrAndDebugLoc(PtrAdd);
3423   Builder.buildIntToPtr(PtrAdd.getReg(0), PtrAdd.getOffsetReg());
3424   PtrAdd.eraseFromParent();
3425 }
3426 
3427 /// The second source operand is known to be a power of 2.
3428 void CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
3429   Register DstReg = MI.getOperand(0).getReg();
3430   Register Src0 = MI.getOperand(1).getReg();
3431   Register Pow2Src1 = MI.getOperand(2).getReg();
3432   LLT Ty = MRI.getType(DstReg);
3433   Builder.setInstrAndDebugLoc(MI);
3434 
3435   // Fold (urem x, pow2) -> (and x, pow2-1)
3436   auto NegOne = Builder.buildConstant(Ty, -1);
3437   auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3438   Builder.buildAnd(DstReg, Src0, Add);
3439   MI.eraseFromParent();
3440 }
3441 
3442 Optional<SmallVector<Register, 8>>
3443 CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3444   assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3445   // We want to detect if Root is part of a tree which represents a bunch
3446   // of loads being merged into a larger load. We'll try to recognize patterns
3447   // like, for example:
3448   //
3449   //  Reg   Reg
3450   //   \    /
3451   //    OR_1   Reg
3452   //     \    /
3453   //      OR_2
3454   //        \     Reg
3455   //         .. /
3456   //        Root
3457   //
3458   //  Reg   Reg   Reg   Reg
3459   //     \ /       \   /
3460   //     OR_1      OR_2
3461   //       \       /
3462   //        \    /
3463   //         ...
3464   //         Root
3465   //
3466   // Each "Reg" may have been produced by a load + some arithmetic. This
3467   // function will save each of them.
3468   SmallVector<Register, 8> RegsToVisit;
3469   SmallVector<const MachineInstr *, 7> Ors = {Root};
3470 
3471   // In the "worst" case, we're dealing with a load for each byte. So, there
3472   // are at most #bytes - 1 ORs.
3473   const unsigned MaxIter =
3474       MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3475   for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3476     if (Ors.empty())
3477       break;
3478     const MachineInstr *Curr = Ors.pop_back_val();
3479     Register OrLHS = Curr->getOperand(1).getReg();
3480     Register OrRHS = Curr->getOperand(2).getReg();
3481 
3482     // In the combine, we want to elimate the entire tree.
3483     if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3484       return None;
3485 
3486     // If it's a G_OR, save it and continue to walk. If it's not, then it's
3487     // something that may be a load + arithmetic.
3488     if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3489       Ors.push_back(Or);
3490     else
3491       RegsToVisit.push_back(OrLHS);
3492     if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3493       Ors.push_back(Or);
3494     else
3495       RegsToVisit.push_back(OrRHS);
3496   }
3497 
3498   // We're going to try and merge each register into a wider power-of-2 type,
3499   // so we ought to have an even number of registers.
3500   if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3501     return None;
3502   return RegsToVisit;
3503 }
3504 
3505 /// Helper function for findLoadOffsetsForLoadOrCombine.
3506 ///
3507 /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3508 /// and then moving that value into a specific byte offset.
3509 ///
3510 /// e.g. x[i] << 24
3511 ///
3512 /// \returns The load instruction and the byte offset it is moved into.
3513 static Optional<std::pair<GZExtLoad *, int64_t>>
3514 matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3515                          const MachineRegisterInfo &MRI) {
3516   assert(MRI.hasOneNonDBGUse(Reg) &&
3517          "Expected Reg to only have one non-debug use?");
3518   Register MaybeLoad;
3519   int64_t Shift;
3520   if (!mi_match(Reg, MRI,
3521                 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3522     Shift = 0;
3523     MaybeLoad = Reg;
3524   }
3525 
3526   if (Shift % MemSizeInBits != 0)
3527     return None;
3528 
3529   // TODO: Handle other types of loads.
3530   auto *Load = getOpcodeDef<GZExtLoad>(MaybeLoad, MRI);
3531   if (!Load)
3532     return None;
3533 
3534   if (!Load->isUnordered() || Load->getMemSizeInBits() != MemSizeInBits)
3535     return None;
3536 
3537   return std::make_pair(Load, Shift / MemSizeInBits);
3538 }
3539 
3540 Optional<std::tuple<GZExtLoad *, int64_t, GZExtLoad *>>
3541 CombinerHelper::findLoadOffsetsForLoadOrCombine(
3542     SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
3543     const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3544 
3545   // Each load found for the pattern. There should be one for each RegsToVisit.
3546   SmallSetVector<const MachineInstr *, 8> Loads;
3547 
3548   // The lowest index used in any load. (The lowest "i" for each x[i].)
3549   int64_t LowestIdx = INT64_MAX;
3550 
3551   // The load which uses the lowest index.
3552   GZExtLoad *LowestIdxLoad = nullptr;
3553 
3554   // Keeps track of the load indices we see. We shouldn't see any indices twice.
3555   SmallSet<int64_t, 8> SeenIdx;
3556 
3557   // Ensure each load is in the same MBB.
3558   // TODO: Support multiple MachineBasicBlocks.
3559   MachineBasicBlock *MBB = nullptr;
3560   const MachineMemOperand *MMO = nullptr;
3561 
3562   // Earliest instruction-order load in the pattern.
3563   GZExtLoad *EarliestLoad = nullptr;
3564 
3565   // Latest instruction-order load in the pattern.
3566   GZExtLoad *LatestLoad = nullptr;
3567 
3568   // Base pointer which every load should share.
3569   Register BasePtr;
3570 
3571   // We want to find a load for each register. Each load should have some
3572   // appropriate bit twiddling arithmetic. During this loop, we will also keep
3573   // track of the load which uses the lowest index. Later, we will check if we
3574   // can use its pointer in the final, combined load.
3575   for (auto Reg : RegsToVisit) {
3576     // Find the load, and find the position that it will end up in (e.g. a
3577     // shifted) value.
3578     auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3579     if (!LoadAndPos)
3580       return None;
3581     GZExtLoad *Load;
3582     int64_t DstPos;
3583     std::tie(Load, DstPos) = *LoadAndPos;
3584 
3585     // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3586     // it is difficult to check for stores/calls/etc between loads.
3587     MachineBasicBlock *LoadMBB = Load->getParent();
3588     if (!MBB)
3589       MBB = LoadMBB;
3590     if (LoadMBB != MBB)
3591       return None;
3592 
3593     // Make sure that the MachineMemOperands of every seen load are compatible.
3594     auto &LoadMMO = Load->getMMO();
3595     if (!MMO)
3596       MMO = &LoadMMO;
3597     if (MMO->getAddrSpace() != LoadMMO.getAddrSpace())
3598       return None;
3599 
3600     // Find out what the base pointer and index for the load is.
3601     Register LoadPtr;
3602     int64_t Idx;
3603     if (!mi_match(Load->getOperand(1).getReg(), MRI,
3604                   m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3605       LoadPtr = Load->getOperand(1).getReg();
3606       Idx = 0;
3607     }
3608 
3609     // Don't combine things like a[i], a[i] -> a bigger load.
3610     if (!SeenIdx.insert(Idx).second)
3611       return None;
3612 
3613     // Every load must share the same base pointer; don't combine things like:
3614     //
3615     // a[i], b[i + 1] -> a bigger load.
3616     if (!BasePtr.isValid())
3617       BasePtr = LoadPtr;
3618     if (BasePtr != LoadPtr)
3619       return None;
3620 
3621     if (Idx < LowestIdx) {
3622       LowestIdx = Idx;
3623       LowestIdxLoad = Load;
3624     }
3625 
3626     // Keep track of the byte offset that this load ends up at. If we have seen
3627     // the byte offset, then stop here. We do not want to combine:
3628     //
3629     // a[i] << 16, a[i + k] << 16 -> a bigger load.
3630     if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3631       return None;
3632     Loads.insert(Load);
3633 
3634     // Keep track of the position of the earliest/latest loads in the pattern.
3635     // We will check that there are no load fold barriers between them later
3636     // on.
3637     //
3638     // FIXME: Is there a better way to check for load fold barriers?
3639     if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3640       EarliestLoad = Load;
3641     if (!LatestLoad || dominates(*LatestLoad, *Load))
3642       LatestLoad = Load;
3643   }
3644 
3645   // We found a load for each register. Let's check if each load satisfies the
3646   // pattern.
3647   assert(Loads.size() == RegsToVisit.size() &&
3648          "Expected to find a load for each register?");
3649   assert(EarliestLoad != LatestLoad && EarliestLoad &&
3650          LatestLoad && "Expected at least two loads?");
3651 
3652   // Check if there are any stores, calls, etc. between any of the loads. If
3653   // there are, then we can't safely perform the combine.
3654   //
3655   // MaxIter is chosen based off the (worst case) number of iterations it
3656   // typically takes to succeed in the LLVM test suite plus some padding.
3657   //
3658   // FIXME: Is there a better way to check for load fold barriers?
3659   const unsigned MaxIter = 20;
3660   unsigned Iter = 0;
3661   for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3662                                                  LatestLoad->getIterator())) {
3663     if (Loads.count(&MI))
3664       continue;
3665     if (MI.isLoadFoldBarrier())
3666       return None;
3667     if (Iter++ == MaxIter)
3668       return None;
3669   }
3670 
3671   return std::make_tuple(LowestIdxLoad, LowestIdx, LatestLoad);
3672 }
3673 
3674 bool CombinerHelper::matchLoadOrCombine(
3675     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3676   assert(MI.getOpcode() == TargetOpcode::G_OR);
3677   MachineFunction &MF = *MI.getMF();
3678   // Assuming a little-endian target, transform:
3679   //  s8 *a = ...
3680   //  s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3681   // =>
3682   //  s32 val = *((i32)a)
3683   //
3684   //  s8 *a = ...
3685   //  s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3686   // =>
3687   //  s32 val = BSWAP(*((s32)a))
3688   Register Dst = MI.getOperand(0).getReg();
3689   LLT Ty = MRI.getType(Dst);
3690   if (Ty.isVector())
3691     return false;
3692 
3693   // We need to combine at least two loads into this type. Since the smallest
3694   // possible load is into a byte, we need at least a 16-bit wide type.
3695   const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3696   if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3697     return false;
3698 
3699   // Match a collection of non-OR instructions in the pattern.
3700   auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3701   if (!RegsToVisit)
3702     return false;
3703 
3704   // We have a collection of non-OR instructions. Figure out how wide each of
3705   // the small loads should be based off of the number of potential loads we
3706   // found.
3707   const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3708   if (NarrowMemSizeInBits % 8 != 0)
3709     return false;
3710 
3711   // Check if each register feeding into each OR is a load from the same
3712   // base pointer + some arithmetic.
3713   //
3714   // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3715   //
3716   // Also verify that each of these ends up putting a[i] into the same memory
3717   // offset as a load into a wide type would.
3718   SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
3719   GZExtLoad *LowestIdxLoad, *LatestLoad;
3720   int64_t LowestIdx;
3721   auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3722       MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3723   if (!MaybeLoadInfo)
3724     return false;
3725   std::tie(LowestIdxLoad, LowestIdx, LatestLoad) = *MaybeLoadInfo;
3726 
3727   // We have a bunch of loads being OR'd together. Using the addresses + offsets
3728   // we found before, check if this corresponds to a big or little endian byte
3729   // pattern. If it does, then we can represent it using a load + possibly a
3730   // BSWAP.
3731   bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3732   Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3733   if (!IsBigEndian.hasValue())
3734     return false;
3735   bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3736   if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3737     return false;
3738 
3739   // Make sure that the load from the lowest index produces offset 0 in the
3740   // final value.
3741   //
3742   // This ensures that we won't combine something like this:
3743   //
3744   // load x[i] -> byte 2
3745   // load x[i+1] -> byte 0 ---> wide_load x[i]
3746   // load x[i+2] -> byte 1
3747   const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3748   const unsigned ZeroByteOffset =
3749       *IsBigEndian
3750           ? bigEndianByteAt(NumLoadsInTy, 0)
3751           : littleEndianByteAt(NumLoadsInTy, 0);
3752   auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3753   if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3754       ZeroOffsetIdx->second != LowestIdx)
3755     return false;
3756 
3757   // We wil reuse the pointer from the load which ends up at byte offset 0. It
3758   // may not use index 0.
3759   Register Ptr = LowestIdxLoad->getPointerReg();
3760   const MachineMemOperand &MMO = LowestIdxLoad->getMMO();
3761   LegalityQuery::MemDesc MMDesc;
3762   MMDesc.MemoryTy = Ty;
3763   MMDesc.AlignInBits = MMO.getAlign().value() * 8;
3764   MMDesc.Ordering = MMO.getSuccessOrdering();
3765   if (!isLegalOrBeforeLegalizer(
3766           {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3767     return false;
3768   auto PtrInfo = MMO.getPointerInfo();
3769   auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3770 
3771   // Load must be allowed and fast on the target.
3772   LLVMContext &C = MF.getFunction().getContext();
3773   auto &DL = MF.getDataLayout();
3774   bool Fast = false;
3775   if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3776       !Fast)
3777     return false;
3778 
3779   MatchInfo = [=](MachineIRBuilder &MIB) {
3780     MIB.setInstrAndDebugLoc(*LatestLoad);
3781     Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3782     MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3783     if (NeedsBSwap)
3784       MIB.buildBSwap(Dst, LoadDst);
3785   };
3786   return true;
3787 }
3788 
3789 bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
3790                                             MachineInstr *&ExtMI) {
3791   assert(MI.getOpcode() == TargetOpcode::G_PHI);
3792 
3793   Register DstReg = MI.getOperand(0).getReg();
3794 
3795   // TODO: Extending a vector may be expensive, don't do this until heuristics
3796   // are better.
3797   if (MRI.getType(DstReg).isVector())
3798     return false;
3799 
3800   // Try to match a phi, whose only use is an extend.
3801   if (!MRI.hasOneNonDBGUse(DstReg))
3802     return false;
3803   ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3804   switch (ExtMI->getOpcode()) {
3805   case TargetOpcode::G_ANYEXT:
3806     return true; // G_ANYEXT is usually free.
3807   case TargetOpcode::G_ZEXT:
3808   case TargetOpcode::G_SEXT:
3809     break;
3810   default:
3811     return false;
3812   }
3813 
3814   // If the target is likely to fold this extend away, don't propagate.
3815   if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
3816     return false;
3817 
3818   // We don't want to propagate the extends unless there's a good chance that
3819   // they'll be optimized in some way.
3820   // Collect the unique incoming values.
3821   SmallPtrSet<MachineInstr *, 4> InSrcs;
3822   for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
3823     auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
3824     switch (DefMI->getOpcode()) {
3825     case TargetOpcode::G_LOAD:
3826     case TargetOpcode::G_TRUNC:
3827     case TargetOpcode::G_SEXT:
3828     case TargetOpcode::G_ZEXT:
3829     case TargetOpcode::G_ANYEXT:
3830     case TargetOpcode::G_CONSTANT:
3831       InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
3832       // Don't try to propagate if there are too many places to create new
3833       // extends, chances are it'll increase code size.
3834       if (InSrcs.size() > 2)
3835         return false;
3836       break;
3837     default:
3838       return false;
3839     }
3840   }
3841   return true;
3842 }
3843 
3844 void CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
3845                                             MachineInstr *&ExtMI) {
3846   assert(MI.getOpcode() == TargetOpcode::G_PHI);
3847   Register DstReg = ExtMI->getOperand(0).getReg();
3848   LLT ExtTy = MRI.getType(DstReg);
3849 
3850   // Propagate the extension into the block of each incoming reg's block.
3851   // Use a SetVector here because PHIs can have duplicate edges, and we want
3852   // deterministic iteration order.
3853   SmallSetVector<MachineInstr *, 8> SrcMIs;
3854   SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
3855   for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
3856     auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
3857     if (!SrcMIs.insert(SrcMI))
3858       continue;
3859 
3860     // Build an extend after each src inst.
3861     auto *MBB = SrcMI->getParent();
3862     MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
3863     if (InsertPt != MBB->end() && InsertPt->isPHI())
3864       InsertPt = MBB->getFirstNonPHI();
3865 
3866     Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
3867     Builder.setDebugLoc(MI.getDebugLoc());
3868     auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
3869                                           SrcMI->getOperand(0).getReg());
3870     OldToNewSrcMap[SrcMI] = NewExt;
3871   }
3872 
3873   // Create a new phi with the extended inputs.
3874   Builder.setInstrAndDebugLoc(MI);
3875   auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
3876   NewPhi.addDef(DstReg);
3877   for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
3878     auto &MO = MI.getOperand(SrcIdx);
3879     if (!MO.isReg()) {
3880       NewPhi.addMBB(MO.getMBB());
3881       continue;
3882     }
3883     auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
3884     NewPhi.addUse(NewSrc->getOperand(0).getReg());
3885   }
3886   Builder.insertInstr(NewPhi);
3887   ExtMI->eraseFromParent();
3888 }
3889 
3890 bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
3891                                                 Register &Reg) {
3892   assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
3893   // If we have a constant index, look for a G_BUILD_VECTOR source
3894   // and find the source register that the index maps to.
3895   Register SrcVec = MI.getOperand(1).getReg();
3896   LLT SrcTy = MRI.getType(SrcVec);
3897   if (!isLegalOrBeforeLegalizer(
3898           {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
3899     return false;
3900 
3901   auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
3902   if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
3903     return false;
3904 
3905   unsigned VecIdx = Cst->Value.getZExtValue();
3906   MachineInstr *BuildVecMI =
3907       getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI);
3908   if (!BuildVecMI) {
3909     BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI);
3910     if (!BuildVecMI)
3911       return false;
3912     LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg());
3913     if (!isLegalOrBeforeLegalizer(
3914             {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}}))
3915       return false;
3916   }
3917 
3918   EVT Ty(getMVTForLLT(SrcTy));
3919   if (!MRI.hasOneNonDBGUse(SrcVec) &&
3920       !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
3921     return false;
3922 
3923   Reg = BuildVecMI->getOperand(VecIdx + 1).getReg();
3924   return true;
3925 }
3926 
3927 void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
3928                                                 Register &Reg) {
3929   // Check the type of the register, since it may have come from a
3930   // G_BUILD_VECTOR_TRUNC.
3931   LLT ScalarTy = MRI.getType(Reg);
3932   Register DstReg = MI.getOperand(0).getReg();
3933   LLT DstTy = MRI.getType(DstReg);
3934 
3935   Builder.setInstrAndDebugLoc(MI);
3936   if (ScalarTy != DstTy) {
3937     assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
3938     Builder.buildTrunc(DstReg, Reg);
3939     MI.eraseFromParent();
3940     return;
3941   }
3942   replaceSingleDefInstWithReg(MI, Reg);
3943 }
3944 
3945 bool CombinerHelper::matchExtractAllEltsFromBuildVector(
3946     MachineInstr &MI,
3947     SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3948   assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3949   // This combine tries to find build_vector's which have every source element
3950   // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
3951   // the masked load scalarization is run late in the pipeline. There's already
3952   // a combine for a similar pattern starting from the extract, but that
3953   // doesn't attempt to do it if there are multiple uses of the build_vector,
3954   // which in this case is true. Starting the combine from the build_vector
3955   // feels more natural than trying to find sibling nodes of extracts.
3956   // E.g.
3957   //  %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
3958   //  %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
3959   //  %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
3960   //  %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
3961   //  %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
3962   // ==>
3963   // replace ext{1,2,3,4} with %s{1,2,3,4}
3964 
3965   Register DstReg = MI.getOperand(0).getReg();
3966   LLT DstTy = MRI.getType(DstReg);
3967   unsigned NumElts = DstTy.getNumElements();
3968 
3969   SmallBitVector ExtractedElts(NumElts);
3970   for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
3971                              MRI.use_instr_nodbg_end())) {
3972     if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
3973       return false;
3974     auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
3975     if (!Cst)
3976       return false;
3977     unsigned Idx = Cst.getValue().getZExtValue();
3978     if (Idx >= NumElts)
3979       return false; // Out of range.
3980     ExtractedElts.set(Idx);
3981     SrcDstPairs.emplace_back(
3982         std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
3983   }
3984   // Match if every element was extracted.
3985   return ExtractedElts.all();
3986 }
3987 
3988 void CombinerHelper::applyExtractAllEltsFromBuildVector(
3989     MachineInstr &MI,
3990     SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3991   assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3992   for (auto &Pair : SrcDstPairs) {
3993     auto *ExtMI = Pair.second;
3994     replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
3995     ExtMI->eraseFromParent();
3996   }
3997   MI.eraseFromParent();
3998 }
3999 
4000 void CombinerHelper::applyBuildFn(
4001     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4002   Builder.setInstrAndDebugLoc(MI);
4003   MatchInfo(Builder);
4004   MI.eraseFromParent();
4005 }
4006 
4007 void CombinerHelper::applyBuildFnNoErase(
4008     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4009   Builder.setInstrAndDebugLoc(MI);
4010   MatchInfo(Builder);
4011 }
4012 
4013 /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
4014 bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
4015   unsigned Opc = MI.getOpcode();
4016   assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4017   Register X = MI.getOperand(1).getReg();
4018   Register Y = MI.getOperand(2).getReg();
4019   if (X != Y)
4020     return false;
4021   unsigned RotateOpc =
4022       Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
4023   return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
4024 }
4025 
4026 void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
4027   unsigned Opc = MI.getOpcode();
4028   assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
4029   bool IsFSHL = Opc == TargetOpcode::G_FSHL;
4030   Observer.changingInstr(MI);
4031   MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
4032                                          : TargetOpcode::G_ROTR));
4033   MI.RemoveOperand(2);
4034   Observer.changedInstr(MI);
4035 }
4036 
4037 // Fold (rot x, c) -> (rot x, c % BitSize)
4038 bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) {
4039   assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4040          MI.getOpcode() == TargetOpcode::G_ROTR);
4041   unsigned Bitsize =
4042       MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4043   Register AmtReg = MI.getOperand(2).getReg();
4044   bool OutOfRange = false;
4045   auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
4046     if (auto *CI = dyn_cast<ConstantInt>(C))
4047       OutOfRange |= CI->getValue().uge(Bitsize);
4048     return true;
4049   };
4050   return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
4051 }
4052 
4053 void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) {
4054   assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
4055          MI.getOpcode() == TargetOpcode::G_ROTR);
4056   unsigned Bitsize =
4057       MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
4058   Builder.setInstrAndDebugLoc(MI);
4059   Register Amt = MI.getOperand(2).getReg();
4060   LLT AmtTy = MRI.getType(Amt);
4061   auto Bits = Builder.buildConstant(AmtTy, Bitsize);
4062   Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
4063   Observer.changingInstr(MI);
4064   MI.getOperand(2).setReg(Amt);
4065   Observer.changedInstr(MI);
4066 }
4067 
4068 bool CombinerHelper::matchICmpToTrueFalseKnownBits(MachineInstr &MI,
4069                                                    int64_t &MatchInfo) {
4070   assert(MI.getOpcode() == TargetOpcode::G_ICMP);
4071   auto Pred = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
4072   auto KnownLHS = KB->getKnownBits(MI.getOperand(2).getReg());
4073   auto KnownRHS = KB->getKnownBits(MI.getOperand(3).getReg());
4074   Optional<bool> KnownVal;
4075   switch (Pred) {
4076   default:
4077     llvm_unreachable("Unexpected G_ICMP predicate?");
4078   case CmpInst::ICMP_EQ:
4079     KnownVal = KnownBits::eq(KnownLHS, KnownRHS);
4080     break;
4081   case CmpInst::ICMP_NE:
4082     KnownVal = KnownBits::ne(KnownLHS, KnownRHS);
4083     break;
4084   case CmpInst::ICMP_SGE:
4085     KnownVal = KnownBits::sge(KnownLHS, KnownRHS);
4086     break;
4087   case CmpInst::ICMP_SGT:
4088     KnownVal = KnownBits::sgt(KnownLHS, KnownRHS);
4089     break;
4090   case CmpInst::ICMP_SLE:
4091     KnownVal = KnownBits::sle(KnownLHS, KnownRHS);
4092     break;
4093   case CmpInst::ICMP_SLT:
4094     KnownVal = KnownBits::slt(KnownLHS, KnownRHS);
4095     break;
4096   case CmpInst::ICMP_UGE:
4097     KnownVal = KnownBits::uge(KnownLHS, KnownRHS);
4098     break;
4099   case CmpInst::ICMP_UGT:
4100     KnownVal = KnownBits::ugt(KnownLHS, KnownRHS);
4101     break;
4102   case CmpInst::ICMP_ULE:
4103     KnownVal = KnownBits::ule(KnownLHS, KnownRHS);
4104     break;
4105   case CmpInst::ICMP_ULT:
4106     KnownVal = KnownBits::ult(KnownLHS, KnownRHS);
4107     break;
4108   }
4109   if (!KnownVal)
4110     return false;
4111   MatchInfo =
4112       *KnownVal
4113           ? getICmpTrueVal(getTargetLowering(),
4114                            /*IsVector = */
4115                            MRI.getType(MI.getOperand(0).getReg()).isVector(),
4116                            /* IsFP = */ false)
4117           : 0;
4118   return true;
4119 }
4120 
4121 /// Form a G_SBFX from a G_SEXT_INREG fed by a right shift.
4122 bool CombinerHelper::matchBitfieldExtractFromSExtInReg(
4123     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4124   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
4125   Register Dst = MI.getOperand(0).getReg();
4126   Register Src = MI.getOperand(1).getReg();
4127   LLT Ty = MRI.getType(Src);
4128   LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4129   if (!LI || !LI->isLegalOrCustom({TargetOpcode::G_SBFX, {Ty, ExtractTy}}))
4130     return false;
4131   int64_t Width = MI.getOperand(2).getImm();
4132   Register ShiftSrc;
4133   int64_t ShiftImm;
4134   if (!mi_match(
4135           Src, MRI,
4136           m_OneNonDBGUse(m_any_of(m_GAShr(m_Reg(ShiftSrc), m_ICst(ShiftImm)),
4137                                   m_GLShr(m_Reg(ShiftSrc), m_ICst(ShiftImm))))))
4138     return false;
4139   if (ShiftImm < 0 || ShiftImm + Width > Ty.getScalarSizeInBits())
4140     return false;
4141 
4142   MatchInfo = [=](MachineIRBuilder &B) {
4143     auto Cst1 = B.buildConstant(ExtractTy, ShiftImm);
4144     auto Cst2 = B.buildConstant(ExtractTy, Width);
4145     B.buildSbfx(Dst, ShiftSrc, Cst1, Cst2);
4146   };
4147   return true;
4148 }
4149 
4150 /// Form a G_UBFX from "(a srl b) & mask", where b and mask are constants.
4151 bool CombinerHelper::matchBitfieldExtractFromAnd(
4152     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4153   assert(MI.getOpcode() == TargetOpcode::G_AND);
4154   Register Dst = MI.getOperand(0).getReg();
4155   LLT Ty = MRI.getType(Dst);
4156   if (!getTargetLowering().isConstantUnsignedBitfieldExtactLegal(
4157           TargetOpcode::G_UBFX, Ty, Ty))
4158     return false;
4159 
4160   int64_t AndImm, LSBImm;
4161   Register ShiftSrc;
4162   const unsigned Size = Ty.getScalarSizeInBits();
4163   if (!mi_match(MI.getOperand(0).getReg(), MRI,
4164                 m_GAnd(m_OneNonDBGUse(m_GLShr(m_Reg(ShiftSrc), m_ICst(LSBImm))),
4165                        m_ICst(AndImm))))
4166     return false;
4167 
4168   // The mask is a mask of the low bits iff imm & (imm+1) == 0.
4169   auto MaybeMask = static_cast<uint64_t>(AndImm);
4170   if (MaybeMask & (MaybeMask + 1))
4171     return false;
4172 
4173   // LSB must fit within the register.
4174   if (static_cast<uint64_t>(LSBImm) >= Size)
4175     return false;
4176 
4177   LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4178   uint64_t Width = APInt(Size, AndImm).countTrailingOnes();
4179   MatchInfo = [=](MachineIRBuilder &B) {
4180     auto WidthCst = B.buildConstant(ExtractTy, Width);
4181     auto LSBCst = B.buildConstant(ExtractTy, LSBImm);
4182     B.buildInstr(TargetOpcode::G_UBFX, {Dst}, {ShiftSrc, LSBCst, WidthCst});
4183   };
4184   return true;
4185 }
4186 
4187 bool CombinerHelper::matchBitfieldExtractFromShr(
4188     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4189   const unsigned Opcode = MI.getOpcode();
4190   assert(Opcode == TargetOpcode::G_ASHR || Opcode == TargetOpcode::G_LSHR);
4191 
4192   const Register Dst = MI.getOperand(0).getReg();
4193 
4194   const unsigned ExtrOpcode = Opcode == TargetOpcode::G_ASHR
4195                                   ? TargetOpcode::G_SBFX
4196                                   : TargetOpcode::G_UBFX;
4197 
4198   // Check if the type we would use for the extract is legal
4199   LLT Ty = MRI.getType(Dst);
4200   LLT ExtractTy = getTargetLowering().getPreferredShiftAmountTy(Ty);
4201   if (!LI || !LI->isLegalOrCustom({ExtrOpcode, {Ty, ExtractTy}}))
4202     return false;
4203 
4204   Register ShlSrc;
4205   int64_t ShrAmt;
4206   int64_t ShlAmt;
4207   const unsigned Size = Ty.getScalarSizeInBits();
4208 
4209   // Try to match shr (shl x, c1), c2
4210   if (!mi_match(Dst, MRI,
4211                 m_BinOp(Opcode,
4212                         m_OneNonDBGUse(m_GShl(m_Reg(ShlSrc), m_ICst(ShlAmt))),
4213                         m_ICst(ShrAmt))))
4214     return false;
4215 
4216   // Make sure that the shift sizes can fit a bitfield extract
4217   if (ShlAmt < 0 || ShlAmt > ShrAmt || ShrAmt >= Size)
4218     return false;
4219 
4220   // Skip this combine if the G_SEXT_INREG combine could handle it
4221   if (Opcode == TargetOpcode::G_ASHR && ShlAmt == ShrAmt)
4222     return false;
4223 
4224   // Calculate start position and width of the extract
4225   const int64_t Pos = ShrAmt - ShlAmt;
4226   const int64_t Width = Size - ShrAmt;
4227 
4228   MatchInfo = [=](MachineIRBuilder &B) {
4229     auto WidthCst = B.buildConstant(ExtractTy, Width);
4230     auto PosCst = B.buildConstant(ExtractTy, Pos);
4231     B.buildInstr(ExtrOpcode, {Dst}, {ShlSrc, PosCst, WidthCst});
4232   };
4233   return true;
4234 }
4235 
4236 bool CombinerHelper::reassociationCanBreakAddressingModePattern(
4237     MachineInstr &PtrAdd) {
4238   assert(PtrAdd.getOpcode() == TargetOpcode::G_PTR_ADD);
4239 
4240   Register Src1Reg = PtrAdd.getOperand(1).getReg();
4241   MachineInstr *Src1Def = getOpcodeDef(TargetOpcode::G_PTR_ADD, Src1Reg, MRI);
4242   if (!Src1Def)
4243     return false;
4244 
4245   Register Src2Reg = PtrAdd.getOperand(2).getReg();
4246 
4247   if (MRI.hasOneNonDBGUse(Src1Reg))
4248     return false;
4249 
4250   auto C1 = getConstantVRegVal(Src1Def->getOperand(2).getReg(), MRI);
4251   if (!C1)
4252     return false;
4253   auto C2 = getConstantVRegVal(Src2Reg, MRI);
4254   if (!C2)
4255     return false;
4256 
4257   const APInt &C1APIntVal = *C1;
4258   const APInt &C2APIntVal = *C2;
4259   const int64_t CombinedValue = (C1APIntVal + C2APIntVal).getSExtValue();
4260 
4261   for (auto &UseMI : MRI.use_nodbg_instructions(Src1Reg)) {
4262     // This combine may end up running before ptrtoint/inttoptr combines
4263     // manage to eliminate redundant conversions, so try to look through them.
4264     MachineInstr *ConvUseMI = &UseMI;
4265     unsigned ConvUseOpc = ConvUseMI->getOpcode();
4266     while (ConvUseOpc == TargetOpcode::G_INTTOPTR ||
4267            ConvUseOpc == TargetOpcode::G_PTRTOINT) {
4268       Register DefReg = ConvUseMI->getOperand(0).getReg();
4269       if (!MRI.hasOneNonDBGUse(DefReg))
4270         break;
4271       ConvUseMI = &*MRI.use_instr_nodbg_begin(DefReg);
4272       ConvUseOpc = ConvUseMI->getOpcode();
4273     }
4274     auto LoadStore = ConvUseOpc == TargetOpcode::G_LOAD ||
4275                      ConvUseOpc == TargetOpcode::G_STORE;
4276     if (!LoadStore)
4277       continue;
4278     // Is x[offset2] already not a legal addressing mode? If so then
4279     // reassociating the constants breaks nothing (we test offset2 because
4280     // that's the one we hope to fold into the load or store).
4281     TargetLoweringBase::AddrMode AM;
4282     AM.HasBaseReg = true;
4283     AM.BaseOffs = C2APIntVal.getSExtValue();
4284     unsigned AS =
4285         MRI.getType(ConvUseMI->getOperand(1).getReg()).getAddressSpace();
4286     Type *AccessTy =
4287         getTypeForLLT(MRI.getType(ConvUseMI->getOperand(0).getReg()),
4288                       PtrAdd.getMF()->getFunction().getContext());
4289     const auto &TLI = *PtrAdd.getMF()->getSubtarget().getTargetLowering();
4290     if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4291                                    AccessTy, AS))
4292       continue;
4293 
4294     // Would x[offset1+offset2] still be a legal addressing mode?
4295     AM.BaseOffs = CombinedValue;
4296     if (!TLI.isLegalAddressingMode(PtrAdd.getMF()->getDataLayout(), AM,
4297                                    AccessTy, AS))
4298       return true;
4299   }
4300 
4301   return false;
4302 }
4303 
4304 bool CombinerHelper::matchReassocPtrAdd(
4305     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4306   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
4307   // We're trying to match a few pointer computation patterns here for
4308   // re-association opportunities.
4309   // 1) Isolating a constant operand to be on the RHS, e.g.:
4310   // G_PTR_ADD(BASE, G_ADD(X, C)) -> G_PTR_ADD(G_PTR_ADD(BASE, X), C)
4311   //
4312   // 2) Folding two constants in each sub-tree as long as such folding
4313   // doesn't break a legal addressing mode.
4314   // G_PTR_ADD(G_PTR_ADD(BASE, C1), C2) -> G_PTR_ADD(BASE, C1+C2)
4315   Register Src1Reg = MI.getOperand(1).getReg();
4316   Register Src2Reg = MI.getOperand(2).getReg();
4317   MachineInstr *LHS = MRI.getVRegDef(Src1Reg);
4318   MachineInstr *RHS = MRI.getVRegDef(Src2Reg);
4319 
4320   if (LHS->getOpcode() != TargetOpcode::G_PTR_ADD) {
4321     // Try to match example 1).
4322     if (RHS->getOpcode() != TargetOpcode::G_ADD)
4323       return false;
4324     auto C2 = getConstantVRegVal(RHS->getOperand(2).getReg(), MRI);
4325     if (!C2)
4326       return false;
4327 
4328     MatchInfo = [=,&MI](MachineIRBuilder &B) {
4329       LLT PtrTy = MRI.getType(MI.getOperand(0).getReg());
4330 
4331       auto NewBase =
4332           Builder.buildPtrAdd(PtrTy, Src1Reg, RHS->getOperand(1).getReg());
4333       Observer.changingInstr(MI);
4334       MI.getOperand(1).setReg(NewBase.getReg(0));
4335       MI.getOperand(2).setReg(RHS->getOperand(2).getReg());
4336       Observer.changedInstr(MI);
4337     };
4338   } else {
4339     // Try to match example 2.
4340     Register LHSSrc1 = LHS->getOperand(1).getReg();
4341     Register LHSSrc2 = LHS->getOperand(2).getReg();
4342     auto C1 = getConstantVRegVal(LHSSrc2, MRI);
4343     if (!C1)
4344       return false;
4345     auto C2 = getConstantVRegVal(Src2Reg, MRI);
4346     if (!C2)
4347       return false;
4348 
4349     MatchInfo = [=, &MI](MachineIRBuilder &B) {
4350       auto NewCst = B.buildConstant(MRI.getType(Src2Reg), *C1 + *C2);
4351       Observer.changingInstr(MI);
4352       MI.getOperand(1).setReg(LHSSrc1);
4353       MI.getOperand(2).setReg(NewCst.getReg(0));
4354       Observer.changedInstr(MI);
4355     };
4356   }
4357   return !reassociationCanBreakAddressingModePattern(MI);
4358 }
4359 
4360 bool CombinerHelper::matchConstantFold(MachineInstr &MI, APInt &MatchInfo) {
4361   Register Op1 = MI.getOperand(1).getReg();
4362   Register Op2 = MI.getOperand(2).getReg();
4363   auto MaybeCst = ConstantFoldBinOp(MI.getOpcode(), Op1, Op2, MRI);
4364   if (!MaybeCst)
4365     return false;
4366   MatchInfo = *MaybeCst;
4367   return true;
4368 }
4369 
4370 bool CombinerHelper::matchNarrowBinopFeedingAnd(
4371     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
4372   // Look for a binop feeding into an AND with a mask:
4373   //
4374   // %add = G_ADD %lhs, %rhs
4375   // %and = G_AND %add, 000...11111111
4376   //
4377   // Check if it's possible to perform the binop at a narrower width and zext
4378   // back to the original width like so:
4379   //
4380   // %narrow_lhs = G_TRUNC %lhs
4381   // %narrow_rhs = G_TRUNC %rhs
4382   // %narrow_add = G_ADD %narrow_lhs, %narrow_rhs
4383   // %new_add = G_ZEXT %narrow_add
4384   // %and = G_AND %new_add, 000...11111111
4385   //
4386   // This can allow later combines to eliminate the G_AND if it turns out
4387   // that the mask is irrelevant.
4388   assert(MI.getOpcode() == TargetOpcode::G_AND);
4389   Register Dst = MI.getOperand(0).getReg();
4390   Register AndLHS = MI.getOperand(1).getReg();
4391   Register AndRHS = MI.getOperand(2).getReg();
4392   LLT WideTy = MRI.getType(Dst);
4393 
4394   // If the potential binop has more than one use, then it's possible that one
4395   // of those uses will need its full width.
4396   if (!WideTy.isScalar() || !MRI.hasOneNonDBGUse(AndLHS))
4397     return false;
4398 
4399   // Check if the LHS feeding the AND is impacted by the high bits that we're
4400   // masking out.
4401   //
4402   // e.g. for 64-bit x, y:
4403   //
4404   // add_64(x, y) & 65535 == zext(add_16(trunc(x), trunc(y))) & 65535
4405   MachineInstr *LHSInst = getDefIgnoringCopies(AndLHS, MRI);
4406   if (!LHSInst)
4407     return false;
4408   unsigned LHSOpc = LHSInst->getOpcode();
4409   switch (LHSOpc) {
4410   default:
4411     return false;
4412   case TargetOpcode::G_ADD:
4413   case TargetOpcode::G_SUB:
4414   case TargetOpcode::G_MUL:
4415   case TargetOpcode::G_AND:
4416   case TargetOpcode::G_OR:
4417   case TargetOpcode::G_XOR:
4418     break;
4419   }
4420 
4421   // Find the mask on the RHS.
4422   auto Cst = getConstantVRegValWithLookThrough(AndRHS, MRI);
4423   if (!Cst)
4424     return false;
4425   auto Mask = Cst->Value;
4426   if (!Mask.isMask())
4427     return false;
4428 
4429   // No point in combining if there's nothing to truncate.
4430   unsigned NarrowWidth = Mask.countTrailingOnes();
4431   if (NarrowWidth == WideTy.getSizeInBits())
4432     return false;
4433   LLT NarrowTy = LLT::scalar(NarrowWidth);
4434 
4435   // Check if adding the zext + truncates could be harmful.
4436   auto &MF = *MI.getMF();
4437   const auto &TLI = getTargetLowering();
4438   LLVMContext &Ctx = MF.getFunction().getContext();
4439   auto &DL = MF.getDataLayout();
4440   if (!TLI.isTruncateFree(WideTy, NarrowTy, DL, Ctx) ||
4441       !TLI.isZExtFree(NarrowTy, WideTy, DL, Ctx))
4442     return false;
4443   if (!isLegalOrBeforeLegalizer({TargetOpcode::G_TRUNC, {NarrowTy, WideTy}}) ||
4444       !isLegalOrBeforeLegalizer({TargetOpcode::G_ZEXT, {WideTy, NarrowTy}}))
4445     return false;
4446   Register BinOpLHS = LHSInst->getOperand(1).getReg();
4447   Register BinOpRHS = LHSInst->getOperand(2).getReg();
4448   MatchInfo = [=, &MI](MachineIRBuilder &B) {
4449     auto NarrowLHS = Builder.buildTrunc(NarrowTy, BinOpLHS);
4450     auto NarrowRHS = Builder.buildTrunc(NarrowTy, BinOpRHS);
4451     auto NarrowBinOp =
4452         Builder.buildInstr(LHSOpc, {NarrowTy}, {NarrowLHS, NarrowRHS});
4453     auto Ext = Builder.buildZExt(WideTy, NarrowBinOp);
4454     Observer.changingInstr(MI);
4455     MI.getOperand(1).setReg(Ext.getReg(0));
4456     Observer.changedInstr(MI);
4457   };
4458   return true;
4459 }
4460 
4461 bool CombinerHelper::tryCombine(MachineInstr &MI) {
4462   if (tryCombineCopy(MI))
4463     return true;
4464   if (tryCombineExtendingLoads(MI))
4465     return true;
4466   if (tryCombineIndexedLoadStore(MI))
4467     return true;
4468   return false;
4469 }
4470