1 //===-- lib/CodeGen/GlobalISel/GICombinerHelper.cpp -----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "llvm/CodeGen/GlobalISel/CombinerHelper.h"
9 #include "llvm/ADT/SetVector.h"
10 #include "llvm/ADT/SmallBitVector.h"
11 #include "llvm/CodeGen/GlobalISel/Combiner.h"
12 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
13 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
14 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
15 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
16 #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
17 #include "llvm/CodeGen/GlobalISel/Utils.h"
18 #include "llvm/CodeGen/LowLevelType.h"
19 #include "llvm/CodeGen/MachineBasicBlock.h"
20 #include "llvm/CodeGen/MachineDominators.h"
21 #include "llvm/CodeGen/MachineFrameInfo.h"
22 #include "llvm/CodeGen/MachineInstr.h"
23 #include "llvm/CodeGen/MachineMemOperand.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/TargetInstrInfo.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/CodeGen/TargetOpcodes.h"
28 #include "llvm/Support/MathExtras.h"
29 #include "llvm/Target/TargetMachine.h"
30 
31 #define DEBUG_TYPE "gi-combiner"
32 
33 using namespace llvm;
34 using namespace MIPatternMatch;
35 
36 // Option to allow testing of the combiner while no targets know about indexed
37 // addressing.
38 static cl::opt<bool>
39     ForceLegalIndexing("force-legal-indexing", cl::Hidden, cl::init(false),
40                        cl::desc("Force all indexed operations to be "
41                                 "legal for the GlobalISel combiner"));
42 
43 CombinerHelper::CombinerHelper(GISelChangeObserver &Observer,
44                                MachineIRBuilder &B, GISelKnownBits *KB,
45                                MachineDominatorTree *MDT,
46                                const LegalizerInfo *LI)
47     : Builder(B), MRI(Builder.getMF().getRegInfo()), Observer(Observer),
48       KB(KB), MDT(MDT), LI(LI) {
49   (void)this->KB;
50 }
51 
52 const TargetLowering &CombinerHelper::getTargetLowering() const {
53   return *Builder.getMF().getSubtarget().getTargetLowering();
54 }
55 
56 /// \returns The little endian in-memory byte position of byte \p I in a
57 /// \p ByteWidth bytes wide type.
58 ///
59 /// E.g. Given a 4-byte type x, x[0] -> byte 0
60 static unsigned littleEndianByteAt(const unsigned ByteWidth, const unsigned I) {
61   assert(I < ByteWidth && "I must be in [0, ByteWidth)");
62   return I;
63 }
64 
65 /// \returns The big endian in-memory byte position of byte \p I in a
66 /// \p ByteWidth bytes wide type.
67 ///
68 /// E.g. Given a 4-byte type x, x[0] -> byte 3
69 static unsigned bigEndianByteAt(const unsigned ByteWidth, const unsigned I) {
70   assert(I < ByteWidth && "I must be in [0, ByteWidth)");
71   return ByteWidth - I - 1;
72 }
73 
74 /// Given a map from byte offsets in memory to indices in a load/store,
75 /// determine if that map corresponds to a little or big endian byte pattern.
76 ///
77 /// \param MemOffset2Idx maps memory offsets to address offsets.
78 /// \param LowestIdx is the lowest index in \p MemOffset2Idx.
79 ///
80 /// \returns true if the map corresponds to a big endian byte pattern, false
81 /// if it corresponds to a little endian byte pattern, and None otherwise.
82 ///
83 /// E.g. given a 32-bit type x, and x[AddrOffset], the in-memory byte patterns
84 /// are as follows:
85 ///
86 /// AddrOffset   Little endian    Big endian
87 /// 0            0                3
88 /// 1            1                2
89 /// 2            2                1
90 /// 3            3                0
91 static Optional<bool>
92 isBigEndian(const SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
93             int64_t LowestIdx) {
94   // Need at least two byte positions to decide on endianness.
95   unsigned Width = MemOffset2Idx.size();
96   if (Width < 2)
97     return None;
98   bool BigEndian = true, LittleEndian = true;
99   for (unsigned MemOffset = 0; MemOffset < Width; ++ MemOffset) {
100     auto MemOffsetAndIdx = MemOffset2Idx.find(MemOffset);
101     if (MemOffsetAndIdx == MemOffset2Idx.end())
102       return None;
103     const int64_t Idx = MemOffsetAndIdx->second - LowestIdx;
104     assert(Idx >= 0 && "Expected non-negative byte offset?");
105     LittleEndian &= Idx == littleEndianByteAt(Width, MemOffset);
106     BigEndian &= Idx == bigEndianByteAt(Width, MemOffset);
107     if (!BigEndian && !LittleEndian)
108       return None;
109   }
110 
111   assert((BigEndian != LittleEndian) &&
112          "Pattern cannot be both big and little endian!");
113   return BigEndian;
114 }
115 
116 bool CombinerHelper::isLegalOrBeforeLegalizer(
117     const LegalityQuery &Query) const {
118   return !LI || LI->getAction(Query).Action == LegalizeActions::Legal;
119 }
120 
121 void CombinerHelper::replaceRegWith(MachineRegisterInfo &MRI, Register FromReg,
122                                     Register ToReg) const {
123   Observer.changingAllUsesOfReg(MRI, FromReg);
124 
125   if (MRI.constrainRegAttrs(ToReg, FromReg))
126     MRI.replaceRegWith(FromReg, ToReg);
127   else
128     Builder.buildCopy(ToReg, FromReg);
129 
130   Observer.finishedChangingAllUsesOfReg();
131 }
132 
133 void CombinerHelper::replaceRegOpWith(MachineRegisterInfo &MRI,
134                                       MachineOperand &FromRegOp,
135                                       Register ToReg) const {
136   assert(FromRegOp.getParent() && "Expected an operand in an MI");
137   Observer.changingInstr(*FromRegOp.getParent());
138 
139   FromRegOp.setReg(ToReg);
140 
141   Observer.changedInstr(*FromRegOp.getParent());
142 }
143 
144 bool CombinerHelper::tryCombineCopy(MachineInstr &MI) {
145   if (matchCombineCopy(MI)) {
146     applyCombineCopy(MI);
147     return true;
148   }
149   return false;
150 }
151 bool CombinerHelper::matchCombineCopy(MachineInstr &MI) {
152   if (MI.getOpcode() != TargetOpcode::COPY)
153     return false;
154   Register DstReg = MI.getOperand(0).getReg();
155   Register SrcReg = MI.getOperand(1).getReg();
156   return canReplaceReg(DstReg, SrcReg, MRI);
157 }
158 void CombinerHelper::applyCombineCopy(MachineInstr &MI) {
159   Register DstReg = MI.getOperand(0).getReg();
160   Register SrcReg = MI.getOperand(1).getReg();
161   MI.eraseFromParent();
162   replaceRegWith(MRI, DstReg, SrcReg);
163 }
164 
165 bool CombinerHelper::tryCombineConcatVectors(MachineInstr &MI) {
166   bool IsUndef = false;
167   SmallVector<Register, 4> Ops;
168   if (matchCombineConcatVectors(MI, IsUndef, Ops)) {
169     applyCombineConcatVectors(MI, IsUndef, Ops);
170     return true;
171   }
172   return false;
173 }
174 
175 bool CombinerHelper::matchCombineConcatVectors(MachineInstr &MI, bool &IsUndef,
176                                                SmallVectorImpl<Register> &Ops) {
177   assert(MI.getOpcode() == TargetOpcode::G_CONCAT_VECTORS &&
178          "Invalid instruction");
179   IsUndef = true;
180   MachineInstr *Undef = nullptr;
181 
182   // Walk over all the operands of concat vectors and check if they are
183   // build_vector themselves or undef.
184   // Then collect their operands in Ops.
185   for (const MachineOperand &MO : MI.uses()) {
186     Register Reg = MO.getReg();
187     MachineInstr *Def = MRI.getVRegDef(Reg);
188     assert(Def && "Operand not defined");
189     switch (Def->getOpcode()) {
190     case TargetOpcode::G_BUILD_VECTOR:
191       IsUndef = false;
192       // Remember the operands of the build_vector to fold
193       // them into the yet-to-build flattened concat vectors.
194       for (const MachineOperand &BuildVecMO : Def->uses())
195         Ops.push_back(BuildVecMO.getReg());
196       break;
197     case TargetOpcode::G_IMPLICIT_DEF: {
198       LLT OpType = MRI.getType(Reg);
199       // Keep one undef value for all the undef operands.
200       if (!Undef) {
201         Builder.setInsertPt(*MI.getParent(), MI);
202         Undef = Builder.buildUndef(OpType.getScalarType());
203       }
204       assert(MRI.getType(Undef->getOperand(0).getReg()) ==
205                  OpType.getScalarType() &&
206              "All undefs should have the same type");
207       // Break the undef vector in as many scalar elements as needed
208       // for the flattening.
209       for (unsigned EltIdx = 0, EltEnd = OpType.getNumElements();
210            EltIdx != EltEnd; ++EltIdx)
211         Ops.push_back(Undef->getOperand(0).getReg());
212       break;
213     }
214     default:
215       return false;
216     }
217   }
218   return true;
219 }
220 void CombinerHelper::applyCombineConcatVectors(
221     MachineInstr &MI, bool IsUndef, const ArrayRef<Register> Ops) {
222   // We determined that the concat_vectors can be flatten.
223   // Generate the flattened build_vector.
224   Register DstReg = MI.getOperand(0).getReg();
225   Builder.setInsertPt(*MI.getParent(), MI);
226   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
227 
228   // Note: IsUndef is sort of redundant. We could have determine it by
229   // checking that at all Ops are undef.  Alternatively, we could have
230   // generate a build_vector of undefs and rely on another combine to
231   // clean that up.  For now, given we already gather this information
232   // in tryCombineConcatVectors, just save compile time and issue the
233   // right thing.
234   if (IsUndef)
235     Builder.buildUndef(NewDstReg);
236   else
237     Builder.buildBuildVector(NewDstReg, Ops);
238   MI.eraseFromParent();
239   replaceRegWith(MRI, DstReg, NewDstReg);
240 }
241 
242 bool CombinerHelper::tryCombineShuffleVector(MachineInstr &MI) {
243   SmallVector<Register, 4> Ops;
244   if (matchCombineShuffleVector(MI, Ops)) {
245     applyCombineShuffleVector(MI, Ops);
246     return true;
247   }
248   return false;
249 }
250 
251 bool CombinerHelper::matchCombineShuffleVector(MachineInstr &MI,
252                                                SmallVectorImpl<Register> &Ops) {
253   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR &&
254          "Invalid instruction kind");
255   LLT DstType = MRI.getType(MI.getOperand(0).getReg());
256   Register Src1 = MI.getOperand(1).getReg();
257   LLT SrcType = MRI.getType(Src1);
258   // As bizarre as it may look, shuffle vector can actually produce
259   // scalar! This is because at the IR level a <1 x ty> shuffle
260   // vector is perfectly valid.
261   unsigned DstNumElts = DstType.isVector() ? DstType.getNumElements() : 1;
262   unsigned SrcNumElts = SrcType.isVector() ? SrcType.getNumElements() : 1;
263 
264   // If the resulting vector is smaller than the size of the source
265   // vectors being concatenated, we won't be able to replace the
266   // shuffle vector into a concat_vectors.
267   //
268   // Note: We may still be able to produce a concat_vectors fed by
269   //       extract_vector_elt and so on. It is less clear that would
270   //       be better though, so don't bother for now.
271   //
272   // If the destination is a scalar, the size of the sources doesn't
273   // matter. we will lower the shuffle to a plain copy. This will
274   // work only if the source and destination have the same size. But
275   // that's covered by the next condition.
276   //
277   // TODO: If the size between the source and destination don't match
278   //       we could still emit an extract vector element in that case.
279   if (DstNumElts < 2 * SrcNumElts && DstNumElts != 1)
280     return false;
281 
282   // Check that the shuffle mask can be broken evenly between the
283   // different sources.
284   if (DstNumElts % SrcNumElts != 0)
285     return false;
286 
287   // Mask length is a multiple of the source vector length.
288   // Check if the shuffle is some kind of concatenation of the input
289   // vectors.
290   unsigned NumConcat = DstNumElts / SrcNumElts;
291   SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
292   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
293   for (unsigned i = 0; i != DstNumElts; ++i) {
294     int Idx = Mask[i];
295     // Undef value.
296     if (Idx < 0)
297       continue;
298     // Ensure the indices in each SrcType sized piece are sequential and that
299     // the same source is used for the whole piece.
300     if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
301         (ConcatSrcs[i / SrcNumElts] >= 0 &&
302          ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts)))
303       return false;
304     // Remember which source this index came from.
305     ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
306   }
307 
308   // The shuffle is concatenating multiple vectors together.
309   // Collect the different operands for that.
310   Register UndefReg;
311   Register Src2 = MI.getOperand(2).getReg();
312   for (auto Src : ConcatSrcs) {
313     if (Src < 0) {
314       if (!UndefReg) {
315         Builder.setInsertPt(*MI.getParent(), MI);
316         UndefReg = Builder.buildUndef(SrcType).getReg(0);
317       }
318       Ops.push_back(UndefReg);
319     } else if (Src == 0)
320       Ops.push_back(Src1);
321     else
322       Ops.push_back(Src2);
323   }
324   return true;
325 }
326 
327 void CombinerHelper::applyCombineShuffleVector(MachineInstr &MI,
328                                                const ArrayRef<Register> Ops) {
329   Register DstReg = MI.getOperand(0).getReg();
330   Builder.setInsertPt(*MI.getParent(), MI);
331   Register NewDstReg = MRI.cloneVirtualRegister(DstReg);
332 
333   if (Ops.size() == 1)
334     Builder.buildCopy(NewDstReg, Ops[0]);
335   else
336     Builder.buildMerge(NewDstReg, Ops);
337 
338   MI.eraseFromParent();
339   replaceRegWith(MRI, DstReg, NewDstReg);
340 }
341 
342 namespace {
343 
344 /// Select a preference between two uses. CurrentUse is the current preference
345 /// while *ForCandidate is attributes of the candidate under consideration.
346 PreferredTuple ChoosePreferredUse(PreferredTuple &CurrentUse,
347                                   const LLT TyForCandidate,
348                                   unsigned OpcodeForCandidate,
349                                   MachineInstr *MIForCandidate) {
350   if (!CurrentUse.Ty.isValid()) {
351     if (CurrentUse.ExtendOpcode == OpcodeForCandidate ||
352         CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT)
353       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
354     return CurrentUse;
355   }
356 
357   // We permit the extend to hoist through basic blocks but this is only
358   // sensible if the target has extending loads. If you end up lowering back
359   // into a load and extend during the legalizer then the end result is
360   // hoisting the extend up to the load.
361 
362   // Prefer defined extensions to undefined extensions as these are more
363   // likely to reduce the number of instructions.
364   if (OpcodeForCandidate == TargetOpcode::G_ANYEXT &&
365       CurrentUse.ExtendOpcode != TargetOpcode::G_ANYEXT)
366     return CurrentUse;
367   else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ANYEXT &&
368            OpcodeForCandidate != TargetOpcode::G_ANYEXT)
369     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
370 
371   // Prefer sign extensions to zero extensions as sign-extensions tend to be
372   // more expensive.
373   if (CurrentUse.Ty == TyForCandidate) {
374     if (CurrentUse.ExtendOpcode == TargetOpcode::G_SEXT &&
375         OpcodeForCandidate == TargetOpcode::G_ZEXT)
376       return CurrentUse;
377     else if (CurrentUse.ExtendOpcode == TargetOpcode::G_ZEXT &&
378              OpcodeForCandidate == TargetOpcode::G_SEXT)
379       return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
380   }
381 
382   // This is potentially target specific. We've chosen the largest type
383   // because G_TRUNC is usually free. One potential catch with this is that
384   // some targets have a reduced number of larger registers than smaller
385   // registers and this choice potentially increases the live-range for the
386   // larger value.
387   if (TyForCandidate.getSizeInBits() > CurrentUse.Ty.getSizeInBits()) {
388     return {TyForCandidate, OpcodeForCandidate, MIForCandidate};
389   }
390   return CurrentUse;
391 }
392 
393 /// Find a suitable place to insert some instructions and insert them. This
394 /// function accounts for special cases like inserting before a PHI node.
395 /// The current strategy for inserting before PHI's is to duplicate the
396 /// instructions for each predecessor. However, while that's ok for G_TRUNC
397 /// on most targets since it generally requires no code, other targets/cases may
398 /// want to try harder to find a dominating block.
399 static void InsertInsnsWithoutSideEffectsBeforeUse(
400     MachineIRBuilder &Builder, MachineInstr &DefMI, MachineOperand &UseMO,
401     std::function<void(MachineBasicBlock *, MachineBasicBlock::iterator,
402                        MachineOperand &UseMO)>
403         Inserter) {
404   MachineInstr &UseMI = *UseMO.getParent();
405 
406   MachineBasicBlock *InsertBB = UseMI.getParent();
407 
408   // If the use is a PHI then we want the predecessor block instead.
409   if (UseMI.isPHI()) {
410     MachineOperand *PredBB = std::next(&UseMO);
411     InsertBB = PredBB->getMBB();
412   }
413 
414   // If the block is the same block as the def then we want to insert just after
415   // the def instead of at the start of the block.
416   if (InsertBB == DefMI.getParent()) {
417     MachineBasicBlock::iterator InsertPt = &DefMI;
418     Inserter(InsertBB, std::next(InsertPt), UseMO);
419     return;
420   }
421 
422   // Otherwise we want the start of the BB
423   Inserter(InsertBB, InsertBB->getFirstNonPHI(), UseMO);
424 }
425 } // end anonymous namespace
426 
427 bool CombinerHelper::tryCombineExtendingLoads(MachineInstr &MI) {
428   PreferredTuple Preferred;
429   if (matchCombineExtendingLoads(MI, Preferred)) {
430     applyCombineExtendingLoads(MI, Preferred);
431     return true;
432   }
433   return false;
434 }
435 
436 bool CombinerHelper::matchCombineExtendingLoads(MachineInstr &MI,
437                                                 PreferredTuple &Preferred) {
438   // We match the loads and follow the uses to the extend instead of matching
439   // the extends and following the def to the load. This is because the load
440   // must remain in the same position for correctness (unless we also add code
441   // to find a safe place to sink it) whereas the extend is freely movable.
442   // It also prevents us from duplicating the load for the volatile case or just
443   // for performance.
444 
445   if (MI.getOpcode() != TargetOpcode::G_LOAD &&
446       MI.getOpcode() != TargetOpcode::G_SEXTLOAD &&
447       MI.getOpcode() != TargetOpcode::G_ZEXTLOAD)
448     return false;
449 
450   auto &LoadValue = MI.getOperand(0);
451   assert(LoadValue.isReg() && "Result wasn't a register?");
452 
453   LLT LoadValueTy = MRI.getType(LoadValue.getReg());
454   if (!LoadValueTy.isScalar())
455     return false;
456 
457   // Most architectures are going to legalize <s8 loads into at least a 1 byte
458   // load, and the MMOs can only describe memory accesses in multiples of bytes.
459   // If we try to perform extload combining on those, we can end up with
460   // %a(s8) = extload %ptr (load 1 byte from %ptr)
461   // ... which is an illegal extload instruction.
462   if (LoadValueTy.getSizeInBits() < 8)
463     return false;
464 
465   // For non power-of-2 types, they will very likely be legalized into multiple
466   // loads. Don't bother trying to match them into extending loads.
467   if (!isPowerOf2_32(LoadValueTy.getSizeInBits()))
468     return false;
469 
470   // Find the preferred type aside from the any-extends (unless it's the only
471   // one) and non-extending ops. We'll emit an extending load to that type and
472   // and emit a variant of (extend (trunc X)) for the others according to the
473   // relative type sizes. At the same time, pick an extend to use based on the
474   // extend involved in the chosen type.
475   unsigned PreferredOpcode = MI.getOpcode() == TargetOpcode::G_LOAD
476                                  ? TargetOpcode::G_ANYEXT
477                                  : MI.getOpcode() == TargetOpcode::G_SEXTLOAD
478                                        ? TargetOpcode::G_SEXT
479                                        : TargetOpcode::G_ZEXT;
480   Preferred = {LLT(), PreferredOpcode, nullptr};
481   for (auto &UseMI : MRI.use_nodbg_instructions(LoadValue.getReg())) {
482     if (UseMI.getOpcode() == TargetOpcode::G_SEXT ||
483         UseMI.getOpcode() == TargetOpcode::G_ZEXT ||
484         (UseMI.getOpcode() == TargetOpcode::G_ANYEXT)) {
485       const auto &MMO = **MI.memoperands_begin();
486       // For atomics, only form anyextending loads.
487       if (MMO.isAtomic() && UseMI.getOpcode() != TargetOpcode::G_ANYEXT)
488         continue;
489       // Check for legality.
490       if (LI) {
491         LegalityQuery::MemDesc MMDesc;
492         MMDesc.SizeInBits = MMO.getSizeInBits();
493         MMDesc.AlignInBits = MMO.getAlign().value() * 8;
494         MMDesc.Ordering = MMO.getOrdering();
495         LLT UseTy = MRI.getType(UseMI.getOperand(0).getReg());
496         LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
497         if (LI->getAction({MI.getOpcode(), {UseTy, SrcTy}, {MMDesc}}).Action !=
498             LegalizeActions::Legal)
499           continue;
500       }
501       Preferred = ChoosePreferredUse(Preferred,
502                                      MRI.getType(UseMI.getOperand(0).getReg()),
503                                      UseMI.getOpcode(), &UseMI);
504     }
505   }
506 
507   // There were no extends
508   if (!Preferred.MI)
509     return false;
510   // It should be impossible to chose an extend without selecting a different
511   // type since by definition the result of an extend is larger.
512   assert(Preferred.Ty != LoadValueTy && "Extending to same type?");
513 
514   LLVM_DEBUG(dbgs() << "Preferred use is: " << *Preferred.MI);
515   return true;
516 }
517 
518 void CombinerHelper::applyCombineExtendingLoads(MachineInstr &MI,
519                                                 PreferredTuple &Preferred) {
520   // Rewrite the load to the chosen extending load.
521   Register ChosenDstReg = Preferred.MI->getOperand(0).getReg();
522 
523   // Inserter to insert a truncate back to the original type at a given point
524   // with some basic CSE to limit truncate duplication to one per BB.
525   DenseMap<MachineBasicBlock *, MachineInstr *> EmittedInsns;
526   auto InsertTruncAt = [&](MachineBasicBlock *InsertIntoBB,
527                            MachineBasicBlock::iterator InsertBefore,
528                            MachineOperand &UseMO) {
529     MachineInstr *PreviouslyEmitted = EmittedInsns.lookup(InsertIntoBB);
530     if (PreviouslyEmitted) {
531       Observer.changingInstr(*UseMO.getParent());
532       UseMO.setReg(PreviouslyEmitted->getOperand(0).getReg());
533       Observer.changedInstr(*UseMO.getParent());
534       return;
535     }
536 
537     Builder.setInsertPt(*InsertIntoBB, InsertBefore);
538     Register NewDstReg = MRI.cloneVirtualRegister(MI.getOperand(0).getReg());
539     MachineInstr *NewMI = Builder.buildTrunc(NewDstReg, ChosenDstReg);
540     EmittedInsns[InsertIntoBB] = NewMI;
541     replaceRegOpWith(MRI, UseMO, NewDstReg);
542   };
543 
544   Observer.changingInstr(MI);
545   MI.setDesc(
546       Builder.getTII().get(Preferred.ExtendOpcode == TargetOpcode::G_SEXT
547                                ? TargetOpcode::G_SEXTLOAD
548                                : Preferred.ExtendOpcode == TargetOpcode::G_ZEXT
549                                      ? TargetOpcode::G_ZEXTLOAD
550                                      : TargetOpcode::G_LOAD));
551 
552   // Rewrite all the uses to fix up the types.
553   auto &LoadValue = MI.getOperand(0);
554   SmallVector<MachineOperand *, 4> Uses;
555   for (auto &UseMO : MRI.use_operands(LoadValue.getReg()))
556     Uses.push_back(&UseMO);
557 
558   for (auto *UseMO : Uses) {
559     MachineInstr *UseMI = UseMO->getParent();
560 
561     // If the extend is compatible with the preferred extend then we should fix
562     // up the type and extend so that it uses the preferred use.
563     if (UseMI->getOpcode() == Preferred.ExtendOpcode ||
564         UseMI->getOpcode() == TargetOpcode::G_ANYEXT) {
565       Register UseDstReg = UseMI->getOperand(0).getReg();
566       MachineOperand &UseSrcMO = UseMI->getOperand(1);
567       const LLT UseDstTy = MRI.getType(UseDstReg);
568       if (UseDstReg != ChosenDstReg) {
569         if (Preferred.Ty == UseDstTy) {
570           // If the use has the same type as the preferred use, then merge
571           // the vregs and erase the extend. For example:
572           //    %1:_(s8) = G_LOAD ...
573           //    %2:_(s32) = G_SEXT %1(s8)
574           //    %3:_(s32) = G_ANYEXT %1(s8)
575           //    ... = ... %3(s32)
576           // rewrites to:
577           //    %2:_(s32) = G_SEXTLOAD ...
578           //    ... = ... %2(s32)
579           replaceRegWith(MRI, UseDstReg, ChosenDstReg);
580           Observer.erasingInstr(*UseMO->getParent());
581           UseMO->getParent()->eraseFromParent();
582         } else if (Preferred.Ty.getSizeInBits() < UseDstTy.getSizeInBits()) {
583           // If the preferred size is smaller, then keep the extend but extend
584           // from the result of the extending load. For example:
585           //    %1:_(s8) = G_LOAD ...
586           //    %2:_(s32) = G_SEXT %1(s8)
587           //    %3:_(s64) = G_ANYEXT %1(s8)
588           //    ... = ... %3(s64)
589           /// rewrites to:
590           //    %2:_(s32) = G_SEXTLOAD ...
591           //    %3:_(s64) = G_ANYEXT %2:_(s32)
592           //    ... = ... %3(s64)
593           replaceRegOpWith(MRI, UseSrcMO, ChosenDstReg);
594         } else {
595           // If the preferred size is large, then insert a truncate. For
596           // example:
597           //    %1:_(s8) = G_LOAD ...
598           //    %2:_(s64) = G_SEXT %1(s8)
599           //    %3:_(s32) = G_ZEXT %1(s8)
600           //    ... = ... %3(s32)
601           /// rewrites to:
602           //    %2:_(s64) = G_SEXTLOAD ...
603           //    %4:_(s8) = G_TRUNC %2:_(s32)
604           //    %3:_(s64) = G_ZEXT %2:_(s8)
605           //    ... = ... %3(s64)
606           InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO,
607                                                  InsertTruncAt);
608         }
609         continue;
610       }
611       // The use is (one of) the uses of the preferred use we chose earlier.
612       // We're going to update the load to def this value later so just erase
613       // the old extend.
614       Observer.erasingInstr(*UseMO->getParent());
615       UseMO->getParent()->eraseFromParent();
616       continue;
617     }
618 
619     // The use isn't an extend. Truncate back to the type we originally loaded.
620     // This is free on many targets.
621     InsertInsnsWithoutSideEffectsBeforeUse(Builder, MI, *UseMO, InsertTruncAt);
622   }
623 
624   MI.getOperand(0).setReg(ChosenDstReg);
625   Observer.changedInstr(MI);
626 }
627 
628 bool CombinerHelper::isPredecessor(const MachineInstr &DefMI,
629                                    const MachineInstr &UseMI) {
630   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
631          "shouldn't consider debug uses");
632   assert(DefMI.getParent() == UseMI.getParent());
633   if (&DefMI == &UseMI)
634     return false;
635   const MachineBasicBlock &MBB = *DefMI.getParent();
636   auto DefOrUse = find_if(MBB, [&DefMI, &UseMI](const MachineInstr &MI) {
637     return &MI == &DefMI || &MI == &UseMI;
638   });
639   if (DefOrUse == MBB.end())
640     llvm_unreachable("Block must contain both DefMI and UseMI!");
641   return &*DefOrUse == &DefMI;
642 }
643 
644 bool CombinerHelper::dominates(const MachineInstr &DefMI,
645                                const MachineInstr &UseMI) {
646   assert(!DefMI.isDebugInstr() && !UseMI.isDebugInstr() &&
647          "shouldn't consider debug uses");
648   if (MDT)
649     return MDT->dominates(&DefMI, &UseMI);
650   else if (DefMI.getParent() != UseMI.getParent())
651     return false;
652 
653   return isPredecessor(DefMI, UseMI);
654 }
655 
656 bool CombinerHelper::matchSextTruncSextLoad(MachineInstr &MI) {
657   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
658   Register SrcReg = MI.getOperand(1).getReg();
659   Register LoadUser = SrcReg;
660 
661   if (MRI.getType(SrcReg).isVector())
662     return false;
663 
664   Register TruncSrc;
665   if (mi_match(SrcReg, MRI, m_GTrunc(m_Reg(TruncSrc))))
666     LoadUser = TruncSrc;
667 
668   uint64_t SizeInBits = MI.getOperand(2).getImm();
669   // If the source is a G_SEXTLOAD from the same bit width, then we don't
670   // need any extend at all, just a truncate.
671   if (auto *LoadMI = getOpcodeDef(TargetOpcode::G_SEXTLOAD, LoadUser, MRI)) {
672     const auto &MMO = **LoadMI->memoperands_begin();
673     // If truncating more than the original extended value, abort.
674     if (TruncSrc && MRI.getType(TruncSrc).getSizeInBits() < MMO.getSizeInBits())
675       return false;
676     if (MMO.getSizeInBits() == SizeInBits)
677       return true;
678   }
679   return false;
680 }
681 
682 bool CombinerHelper::applySextTruncSextLoad(MachineInstr &MI) {
683   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
684   Builder.setInstrAndDebugLoc(MI);
685   Builder.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
686   MI.eraseFromParent();
687   return true;
688 }
689 
690 bool CombinerHelper::matchSextInRegOfLoad(
691     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
692   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
693 
694   // Only supports scalars for now.
695   if (MRI.getType(MI.getOperand(0).getReg()).isVector())
696     return false;
697 
698   Register SrcReg = MI.getOperand(1).getReg();
699   MachineInstr *LoadDef = getOpcodeDef(TargetOpcode::G_LOAD, SrcReg, MRI);
700   if (!LoadDef || !MRI.hasOneNonDBGUse(LoadDef->getOperand(0).getReg()))
701     return false;
702 
703   // If the sign extend extends from a narrower width than the load's width,
704   // then we can narrow the load width when we combine to a G_SEXTLOAD.
705   auto &MMO = **LoadDef->memoperands_begin();
706   // Don't do this for non-simple loads.
707   if (MMO.isAtomic() || MMO.isVolatile())
708     return false;
709 
710   // Avoid widening the load at all.
711   unsigned NewSizeBits =
712       std::min((uint64_t)MI.getOperand(2).getImm(), MMO.getSizeInBits());
713 
714   // Don't generate G_SEXTLOADs with a < 1 byte width.
715   if (NewSizeBits < 8)
716     return false;
717   // Don't bother creating a non-power-2 sextload, it will likely be broken up
718   // anyway for most targets.
719   if (!isPowerOf2_32(NewSizeBits))
720     return false;
721   MatchInfo = std::make_tuple(LoadDef->getOperand(0).getReg(), NewSizeBits);
722   return true;
723 }
724 
725 bool CombinerHelper::applySextInRegOfLoad(
726     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
727   assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
728   Register LoadReg;
729   unsigned ScalarSizeBits;
730   std::tie(LoadReg, ScalarSizeBits) = MatchInfo;
731   auto *LoadDef = MRI.getVRegDef(LoadReg);
732   assert(LoadDef && "Expected a load reg");
733 
734   // If we have the following:
735   // %ld = G_LOAD %ptr, (load 2)
736   // %ext = G_SEXT_INREG %ld, 8
737   //    ==>
738   // %ld = G_SEXTLOAD %ptr (load 1)
739 
740   auto &MMO = **LoadDef->memoperands_begin();
741   Builder.setInstrAndDebugLoc(*LoadDef);
742   auto &MF = Builder.getMF();
743   auto PtrInfo = MMO.getPointerInfo();
744   auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, ScalarSizeBits / 8);
745   Builder.buildLoadInstr(TargetOpcode::G_SEXTLOAD, MI.getOperand(0).getReg(),
746                          LoadDef->getOperand(1).getReg(), *NewMMO);
747   MI.eraseFromParent();
748   return true;
749 }
750 
751 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
752                                             Register &Base, Register &Offset) {
753   auto &MF = *MI.getParent()->getParent();
754   const auto &TLI = *MF.getSubtarget().getTargetLowering();
755 
756 #ifndef NDEBUG
757   unsigned Opcode = MI.getOpcode();
758   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
759          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
760 #endif
761 
762   Base = MI.getOperand(1).getReg();
763   MachineInstr *BaseDef = MRI.getUniqueVRegDef(Base);
764   if (BaseDef && BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX)
765     return false;
766 
767   LLVM_DEBUG(dbgs() << "Searching for post-indexing opportunity for: " << MI);
768   // FIXME: The following use traversal needs a bail out for patholigical cases.
769   for (auto &Use : MRI.use_nodbg_instructions(Base)) {
770     if (Use.getOpcode() != TargetOpcode::G_PTR_ADD)
771       continue;
772 
773     Offset = Use.getOperand(2).getReg();
774     if (!ForceLegalIndexing &&
775         !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ false, MRI)) {
776       LLVM_DEBUG(dbgs() << "    Ignoring candidate with illegal addrmode: "
777                         << Use);
778       continue;
779     }
780 
781     // Make sure the offset calculation is before the potentially indexed op.
782     // FIXME: we really care about dependency here. The offset calculation might
783     // be movable.
784     MachineInstr *OffsetDef = MRI.getUniqueVRegDef(Offset);
785     if (!OffsetDef || !dominates(*OffsetDef, MI)) {
786       LLVM_DEBUG(dbgs() << "    Ignoring candidate with offset after mem-op: "
787                         << Use);
788       continue;
789     }
790 
791     // FIXME: check whether all uses of Base are load/store with foldable
792     // addressing modes. If so, using the normal addr-modes is better than
793     // forming an indexed one.
794 
795     bool MemOpDominatesAddrUses = true;
796     for (auto &PtrAddUse :
797          MRI.use_nodbg_instructions(Use.getOperand(0).getReg())) {
798       if (!dominates(MI, PtrAddUse)) {
799         MemOpDominatesAddrUses = false;
800         break;
801       }
802     }
803 
804     if (!MemOpDominatesAddrUses) {
805       LLVM_DEBUG(
806           dbgs() << "    Ignoring candidate as memop does not dominate uses: "
807                  << Use);
808       continue;
809     }
810 
811     LLVM_DEBUG(dbgs() << "    Found match: " << Use);
812     Addr = Use.getOperand(0).getReg();
813     return true;
814   }
815 
816   return false;
817 }
818 
819 bool CombinerHelper::findPreIndexCandidate(MachineInstr &MI, Register &Addr,
820                                            Register &Base, Register &Offset) {
821   auto &MF = *MI.getParent()->getParent();
822   const auto &TLI = *MF.getSubtarget().getTargetLowering();
823 
824 #ifndef NDEBUG
825   unsigned Opcode = MI.getOpcode();
826   assert(Opcode == TargetOpcode::G_LOAD || Opcode == TargetOpcode::G_SEXTLOAD ||
827          Opcode == TargetOpcode::G_ZEXTLOAD || Opcode == TargetOpcode::G_STORE);
828 #endif
829 
830   Addr = MI.getOperand(1).getReg();
831   MachineInstr *AddrDef = getOpcodeDef(TargetOpcode::G_PTR_ADD, Addr, MRI);
832   if (!AddrDef || MRI.hasOneNonDBGUse(Addr))
833     return false;
834 
835   Base = AddrDef->getOperand(1).getReg();
836   Offset = AddrDef->getOperand(2).getReg();
837 
838   LLVM_DEBUG(dbgs() << "Found potential pre-indexed load_store: " << MI);
839 
840   if (!ForceLegalIndexing &&
841       !TLI.isIndexingLegal(MI, Base, Offset, /*IsPre*/ true, MRI)) {
842     LLVM_DEBUG(dbgs() << "    Skipping, not legal for target");
843     return false;
844   }
845 
846   MachineInstr *BaseDef = getDefIgnoringCopies(Base, MRI);
847   if (BaseDef->getOpcode() == TargetOpcode::G_FRAME_INDEX) {
848     LLVM_DEBUG(dbgs() << "    Skipping, frame index would need copy anyway.");
849     return false;
850   }
851 
852   if (MI.getOpcode() == TargetOpcode::G_STORE) {
853     // Would require a copy.
854     if (Base == MI.getOperand(0).getReg()) {
855       LLVM_DEBUG(dbgs() << "    Skipping, storing base so need copy anyway.");
856       return false;
857     }
858 
859     // We're expecting one use of Addr in MI, but it could also be the
860     // value stored, which isn't actually dominated by the instruction.
861     if (MI.getOperand(0).getReg() == Addr) {
862       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses");
863       return false;
864     }
865   }
866 
867   // FIXME: check whether all uses of the base pointer are constant PtrAdds.
868   // That might allow us to end base's liveness here by adjusting the constant.
869 
870   for (auto &UseMI : MRI.use_nodbg_instructions(Addr)) {
871     if (!dominates(MI, UseMI)) {
872       LLVM_DEBUG(dbgs() << "    Skipping, does not dominate all addr uses.");
873       return false;
874     }
875   }
876 
877   return true;
878 }
879 
880 bool CombinerHelper::tryCombineIndexedLoadStore(MachineInstr &MI) {
881   IndexedLoadStoreMatchInfo MatchInfo;
882   if (matchCombineIndexedLoadStore(MI, MatchInfo)) {
883     applyCombineIndexedLoadStore(MI, MatchInfo);
884     return true;
885   }
886   return false;
887 }
888 
889 bool CombinerHelper::matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
890   unsigned Opcode = MI.getOpcode();
891   if (Opcode != TargetOpcode::G_LOAD && Opcode != TargetOpcode::G_SEXTLOAD &&
892       Opcode != TargetOpcode::G_ZEXTLOAD && Opcode != TargetOpcode::G_STORE)
893     return false;
894 
895   // For now, no targets actually support these opcodes so don't waste time
896   // running these unless we're forced to for testing.
897   if (!ForceLegalIndexing)
898     return false;
899 
900   MatchInfo.IsPre = findPreIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
901                                           MatchInfo.Offset);
902   if (!MatchInfo.IsPre &&
903       !findPostIndexCandidate(MI, MatchInfo.Addr, MatchInfo.Base,
904                               MatchInfo.Offset))
905     return false;
906 
907   return true;
908 }
909 
910 void CombinerHelper::applyCombineIndexedLoadStore(
911     MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo) {
912   MachineInstr &AddrDef = *MRI.getUniqueVRegDef(MatchInfo.Addr);
913   MachineIRBuilder MIRBuilder(MI);
914   unsigned Opcode = MI.getOpcode();
915   bool IsStore = Opcode == TargetOpcode::G_STORE;
916   unsigned NewOpcode;
917   switch (Opcode) {
918   case TargetOpcode::G_LOAD:
919     NewOpcode = TargetOpcode::G_INDEXED_LOAD;
920     break;
921   case TargetOpcode::G_SEXTLOAD:
922     NewOpcode = TargetOpcode::G_INDEXED_SEXTLOAD;
923     break;
924   case TargetOpcode::G_ZEXTLOAD:
925     NewOpcode = TargetOpcode::G_INDEXED_ZEXTLOAD;
926     break;
927   case TargetOpcode::G_STORE:
928     NewOpcode = TargetOpcode::G_INDEXED_STORE;
929     break;
930   default:
931     llvm_unreachable("Unknown load/store opcode");
932   }
933 
934   auto MIB = MIRBuilder.buildInstr(NewOpcode);
935   if (IsStore) {
936     MIB.addDef(MatchInfo.Addr);
937     MIB.addUse(MI.getOperand(0).getReg());
938   } else {
939     MIB.addDef(MI.getOperand(0).getReg());
940     MIB.addDef(MatchInfo.Addr);
941   }
942 
943   MIB.addUse(MatchInfo.Base);
944   MIB.addUse(MatchInfo.Offset);
945   MIB.addImm(MatchInfo.IsPre);
946   MI.eraseFromParent();
947   AddrDef.eraseFromParent();
948 
949   LLVM_DEBUG(dbgs() << "    Combinined to indexed operation");
950 }
951 
952 bool CombinerHelper::matchCombineDivRem(MachineInstr &MI,
953                                         MachineInstr *&OtherMI) {
954   unsigned Opcode = MI.getOpcode();
955   bool IsDiv, IsSigned;
956 
957   switch (Opcode) {
958   default:
959     llvm_unreachable("Unexpected opcode!");
960   case TargetOpcode::G_SDIV:
961   case TargetOpcode::G_UDIV: {
962     IsDiv = true;
963     IsSigned = Opcode == TargetOpcode::G_SDIV;
964     break;
965   }
966   case TargetOpcode::G_SREM:
967   case TargetOpcode::G_UREM: {
968     IsDiv = false;
969     IsSigned = Opcode == TargetOpcode::G_SREM;
970     break;
971   }
972   }
973 
974   Register Src1 = MI.getOperand(1).getReg();
975   unsigned DivOpcode, RemOpcode, DivremOpcode;
976   if (IsSigned) {
977     DivOpcode = TargetOpcode::G_SDIV;
978     RemOpcode = TargetOpcode::G_SREM;
979     DivremOpcode = TargetOpcode::G_SDIVREM;
980   } else {
981     DivOpcode = TargetOpcode::G_UDIV;
982     RemOpcode = TargetOpcode::G_UREM;
983     DivremOpcode = TargetOpcode::G_UDIVREM;
984   }
985 
986   if (!isLegalOrBeforeLegalizer({DivremOpcode, {MRI.getType(Src1)}}))
987     return false;
988 
989   // Combine:
990   //   %div:_ = G_[SU]DIV %src1:_, %src2:_
991   //   %rem:_ = G_[SU]REM %src1:_, %src2:_
992   // into:
993   //  %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
994 
995   // Combine:
996   //   %rem:_ = G_[SU]REM %src1:_, %src2:_
997   //   %div:_ = G_[SU]DIV %src1:_, %src2:_
998   // into:
999   //  %div:_, %rem:_ = G_[SU]DIVREM %src1:_, %src2:_
1000 
1001   for (auto &UseMI : MRI.use_nodbg_instructions(Src1)) {
1002     if (MI.getParent() == UseMI.getParent() &&
1003         ((IsDiv && UseMI.getOpcode() == RemOpcode) ||
1004          (!IsDiv && UseMI.getOpcode() == DivOpcode)) &&
1005         matchEqualDefs(MI.getOperand(2), UseMI.getOperand(2))) {
1006       OtherMI = &UseMI;
1007       return true;
1008     }
1009   }
1010 
1011   return false;
1012 }
1013 
1014 void CombinerHelper::applyCombineDivRem(MachineInstr &MI,
1015                                         MachineInstr *&OtherMI) {
1016   unsigned Opcode = MI.getOpcode();
1017   assert(OtherMI && "OtherMI shouldn't be empty.");
1018 
1019   Register DestDivReg, DestRemReg;
1020   if (Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_UDIV) {
1021     DestDivReg = MI.getOperand(0).getReg();
1022     DestRemReg = OtherMI->getOperand(0).getReg();
1023   } else {
1024     DestDivReg = OtherMI->getOperand(0).getReg();
1025     DestRemReg = MI.getOperand(0).getReg();
1026   }
1027 
1028   bool IsSigned =
1029       Opcode == TargetOpcode::G_SDIV || Opcode == TargetOpcode::G_SREM;
1030   Builder.setInstrAndDebugLoc(MI);
1031   Builder.buildInstr(IsSigned ? TargetOpcode::G_SDIVREM
1032                               : TargetOpcode::G_UDIVREM,
1033                      {DestDivReg, DestRemReg},
1034                      {MI.getOperand(1).getReg(), MI.getOperand(2).getReg()});
1035   MI.eraseFromParent();
1036   OtherMI->eraseFromParent();
1037 }
1038 
1039 bool CombinerHelper::matchOptBrCondByInvertingCond(MachineInstr &MI,
1040                                                    MachineInstr *&BrCond) {
1041   assert(MI.getOpcode() == TargetOpcode::G_BR);
1042 
1043   // Try to match the following:
1044   // bb1:
1045   //   G_BRCOND %c1, %bb2
1046   //   G_BR %bb3
1047   // bb2:
1048   // ...
1049   // bb3:
1050 
1051   // The above pattern does not have a fall through to the successor bb2, always
1052   // resulting in a branch no matter which path is taken. Here we try to find
1053   // and replace that pattern with conditional branch to bb3 and otherwise
1054   // fallthrough to bb2. This is generally better for branch predictors.
1055 
1056   MachineBasicBlock *MBB = MI.getParent();
1057   MachineBasicBlock::iterator BrIt(MI);
1058   if (BrIt == MBB->begin())
1059     return false;
1060   assert(std::next(BrIt) == MBB->end() && "expected G_BR to be a terminator");
1061 
1062   BrCond = &*std::prev(BrIt);
1063   if (BrCond->getOpcode() != TargetOpcode::G_BRCOND)
1064     return false;
1065 
1066   // Check that the next block is the conditional branch target. Also make sure
1067   // that it isn't the same as the G_BR's target (otherwise, this will loop.)
1068   MachineBasicBlock *BrCondTarget = BrCond->getOperand(1).getMBB();
1069   return BrCondTarget != MI.getOperand(0).getMBB() &&
1070          MBB->isLayoutSuccessor(BrCondTarget);
1071 }
1072 
1073 void CombinerHelper::applyOptBrCondByInvertingCond(MachineInstr &MI,
1074                                                    MachineInstr *&BrCond) {
1075   MachineBasicBlock *BrTarget = MI.getOperand(0).getMBB();
1076   Builder.setInstrAndDebugLoc(*BrCond);
1077   LLT Ty = MRI.getType(BrCond->getOperand(0).getReg());
1078   // FIXME: Does int/fp matter for this? If so, we might need to restrict
1079   // this to i1 only since we might not know for sure what kind of
1080   // compare generated the condition value.
1081   auto True = Builder.buildConstant(
1082       Ty, getICmpTrueVal(getTargetLowering(), false, false));
1083   auto Xor = Builder.buildXor(Ty, BrCond->getOperand(0), True);
1084 
1085   auto *FallthroughBB = BrCond->getOperand(1).getMBB();
1086   Observer.changingInstr(MI);
1087   MI.getOperand(0).setMBB(FallthroughBB);
1088   Observer.changedInstr(MI);
1089 
1090   // Change the conditional branch to use the inverted condition and
1091   // new target block.
1092   Observer.changingInstr(*BrCond);
1093   BrCond->getOperand(0).setReg(Xor.getReg(0));
1094   BrCond->getOperand(1).setMBB(BrTarget);
1095   Observer.changedInstr(*BrCond);
1096 }
1097 
1098 static bool shouldLowerMemFuncForSize(const MachineFunction &MF) {
1099   // On Darwin, -Os means optimize for size without hurting performance, so
1100   // only really optimize for size when -Oz (MinSize) is used.
1101   if (MF.getTarget().getTargetTriple().isOSDarwin())
1102     return MF.getFunction().hasMinSize();
1103   return MF.getFunction().hasOptSize();
1104 }
1105 
1106 // Returns a list of types to use for memory op lowering in MemOps. A partial
1107 // port of findOptimalMemOpLowering in TargetLowering.
1108 static bool findGISelOptimalMemOpLowering(std::vector<LLT> &MemOps,
1109                                           unsigned Limit, const MemOp &Op,
1110                                           unsigned DstAS, unsigned SrcAS,
1111                                           const AttributeList &FuncAttributes,
1112                                           const TargetLowering &TLI) {
1113   if (Op.isMemcpyWithFixedDstAlign() && Op.getSrcAlign() < Op.getDstAlign())
1114     return false;
1115 
1116   LLT Ty = TLI.getOptimalMemOpLLT(Op, FuncAttributes);
1117 
1118   if (Ty == LLT()) {
1119     // Use the largest scalar type whose alignment constraints are satisfied.
1120     // We only need to check DstAlign here as SrcAlign is always greater or
1121     // equal to DstAlign (or zero).
1122     Ty = LLT::scalar(64);
1123     if (Op.isFixedDstAlign())
1124       while (Op.getDstAlign() < Ty.getSizeInBytes() &&
1125              !TLI.allowsMisalignedMemoryAccesses(Ty, DstAS, Op.getDstAlign()))
1126         Ty = LLT::scalar(Ty.getSizeInBytes());
1127     assert(Ty.getSizeInBits() > 0 && "Could not find valid type");
1128     // FIXME: check for the largest legal type we can load/store to.
1129   }
1130 
1131   unsigned NumMemOps = 0;
1132   uint64_t Size = Op.size();
1133   while (Size) {
1134     unsigned TySize = Ty.getSizeInBytes();
1135     while (TySize > Size) {
1136       // For now, only use non-vector load / store's for the left-over pieces.
1137       LLT NewTy = Ty;
1138       // FIXME: check for mem op safety and legality of the types. Not all of
1139       // SDAGisms map cleanly to GISel concepts.
1140       if (NewTy.isVector())
1141         NewTy = NewTy.getSizeInBits() > 64 ? LLT::scalar(64) : LLT::scalar(32);
1142       NewTy = LLT::scalar(PowerOf2Floor(NewTy.getSizeInBits() - 1));
1143       unsigned NewTySize = NewTy.getSizeInBytes();
1144       assert(NewTySize > 0 && "Could not find appropriate type");
1145 
1146       // If the new LLT cannot cover all of the remaining bits, then consider
1147       // issuing a (or a pair of) unaligned and overlapping load / store.
1148       bool Fast;
1149       // Need to get a VT equivalent for allowMisalignedMemoryAccesses().
1150       MVT VT = getMVTForLLT(Ty);
1151       if (NumMemOps && Op.allowOverlap() && NewTySize < Size &&
1152           TLI.allowsMisalignedMemoryAccesses(
1153               VT, DstAS, Op.isFixedDstAlign() ? Op.getDstAlign() : Align(1),
1154               MachineMemOperand::MONone, &Fast) &&
1155           Fast)
1156         TySize = Size;
1157       else {
1158         Ty = NewTy;
1159         TySize = NewTySize;
1160       }
1161     }
1162 
1163     if (++NumMemOps > Limit)
1164       return false;
1165 
1166     MemOps.push_back(Ty);
1167     Size -= TySize;
1168   }
1169 
1170   return true;
1171 }
1172 
1173 static Type *getTypeForLLT(LLT Ty, LLVMContext &C) {
1174   if (Ty.isVector())
1175     return FixedVectorType::get(IntegerType::get(C, Ty.getScalarSizeInBits()),
1176                                 Ty.getNumElements());
1177   return IntegerType::get(C, Ty.getSizeInBits());
1178 }
1179 
1180 // Get a vectorized representation of the memset value operand, GISel edition.
1181 static Register getMemsetValue(Register Val, LLT Ty, MachineIRBuilder &MIB) {
1182   MachineRegisterInfo &MRI = *MIB.getMRI();
1183   unsigned NumBits = Ty.getScalarSizeInBits();
1184   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1185   if (!Ty.isVector() && ValVRegAndVal) {
1186     APInt Scalar = ValVRegAndVal->Value.truncOrSelf(8);
1187     APInt SplatVal = APInt::getSplat(NumBits, Scalar);
1188     return MIB.buildConstant(Ty, SplatVal).getReg(0);
1189   }
1190 
1191   // Extend the byte value to the larger type, and then multiply by a magic
1192   // value 0x010101... in order to replicate it across every byte.
1193   // Unless it's zero, in which case just emit a larger G_CONSTANT 0.
1194   if (ValVRegAndVal && ValVRegAndVal->Value == 0) {
1195     return MIB.buildConstant(Ty, 0).getReg(0);
1196   }
1197 
1198   LLT ExtType = Ty.getScalarType();
1199   auto ZExt = MIB.buildZExtOrTrunc(ExtType, Val);
1200   if (NumBits > 8) {
1201     APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01));
1202     auto MagicMI = MIB.buildConstant(ExtType, Magic);
1203     Val = MIB.buildMul(ExtType, ZExt, MagicMI).getReg(0);
1204   }
1205 
1206   // For vector types create a G_BUILD_VECTOR.
1207   if (Ty.isVector())
1208     Val = MIB.buildSplatVector(Ty, Val).getReg(0);
1209 
1210   return Val;
1211 }
1212 
1213 bool CombinerHelper::optimizeMemset(MachineInstr &MI, Register Dst,
1214                                     Register Val, unsigned KnownLen,
1215                                     Align Alignment, bool IsVolatile) {
1216   auto &MF = *MI.getParent()->getParent();
1217   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1218   auto &DL = MF.getDataLayout();
1219   LLVMContext &C = MF.getFunction().getContext();
1220 
1221   assert(KnownLen != 0 && "Have a zero length memset length!");
1222 
1223   bool DstAlignCanChange = false;
1224   MachineFrameInfo &MFI = MF.getFrameInfo();
1225   bool OptSize = shouldLowerMemFuncForSize(MF);
1226 
1227   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1228   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1229     DstAlignCanChange = true;
1230 
1231   unsigned Limit = TLI.getMaxStoresPerMemset(OptSize);
1232   std::vector<LLT> MemOps;
1233 
1234   const auto &DstMMO = **MI.memoperands_begin();
1235   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1236 
1237   auto ValVRegAndVal = getConstantVRegValWithLookThrough(Val, MRI);
1238   bool IsZeroVal = ValVRegAndVal && ValVRegAndVal->Value == 0;
1239 
1240   if (!findGISelOptimalMemOpLowering(MemOps, Limit,
1241                                      MemOp::Set(KnownLen, DstAlignCanChange,
1242                                                 Alignment,
1243                                                 /*IsZeroMemset=*/IsZeroVal,
1244                                                 /*IsVolatile=*/IsVolatile),
1245                                      DstPtrInfo.getAddrSpace(), ~0u,
1246                                      MF.getFunction().getAttributes(), TLI))
1247     return false;
1248 
1249   if (DstAlignCanChange) {
1250     // Get an estimate of the type from the LLT.
1251     Type *IRTy = getTypeForLLT(MemOps[0], C);
1252     Align NewAlign = DL.getABITypeAlign(IRTy);
1253     if (NewAlign > Alignment) {
1254       Alignment = NewAlign;
1255       unsigned FI = FIDef->getOperand(1).getIndex();
1256       // Give the stack frame object a larger alignment if needed.
1257       if (MFI.getObjectAlign(FI) < Alignment)
1258         MFI.setObjectAlignment(FI, Alignment);
1259     }
1260   }
1261 
1262   MachineIRBuilder MIB(MI);
1263   // Find the largest store and generate the bit pattern for it.
1264   LLT LargestTy = MemOps[0];
1265   for (unsigned i = 1; i < MemOps.size(); i++)
1266     if (MemOps[i].getSizeInBits() > LargestTy.getSizeInBits())
1267       LargestTy = MemOps[i];
1268 
1269   // The memset stored value is always defined as an s8, so in order to make it
1270   // work with larger store types we need to repeat the bit pattern across the
1271   // wider type.
1272   Register MemSetValue = getMemsetValue(Val, LargestTy, MIB);
1273 
1274   if (!MemSetValue)
1275     return false;
1276 
1277   // Generate the stores. For each store type in the list, we generate the
1278   // matching store of that type to the destination address.
1279   LLT PtrTy = MRI.getType(Dst);
1280   unsigned DstOff = 0;
1281   unsigned Size = KnownLen;
1282   for (unsigned I = 0; I < MemOps.size(); I++) {
1283     LLT Ty = MemOps[I];
1284     unsigned TySize = Ty.getSizeInBytes();
1285     if (TySize > Size) {
1286       // Issuing an unaligned load / store pair that overlaps with the previous
1287       // pair. Adjust the offset accordingly.
1288       assert(I == MemOps.size() - 1 && I != 0);
1289       DstOff -= TySize - Size;
1290     }
1291 
1292     // If this store is smaller than the largest store see whether we can get
1293     // the smaller value for free with a truncate.
1294     Register Value = MemSetValue;
1295     if (Ty.getSizeInBits() < LargestTy.getSizeInBits()) {
1296       MVT VT = getMVTForLLT(Ty);
1297       MVT LargestVT = getMVTForLLT(LargestTy);
1298       if (!LargestTy.isVector() && !Ty.isVector() &&
1299           TLI.isTruncateFree(LargestVT, VT))
1300         Value = MIB.buildTrunc(Ty, MemSetValue).getReg(0);
1301       else
1302         Value = getMemsetValue(Val, Ty, MIB);
1303       if (!Value)
1304         return false;
1305     }
1306 
1307     auto *StoreMMO =
1308         MF.getMachineMemOperand(&DstMMO, DstOff, Ty.getSizeInBytes());
1309 
1310     Register Ptr = Dst;
1311     if (DstOff != 0) {
1312       auto Offset =
1313           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), DstOff);
1314       Ptr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1315     }
1316 
1317     MIB.buildStore(Value, Ptr, *StoreMMO);
1318     DstOff += Ty.getSizeInBytes();
1319     Size -= TySize;
1320   }
1321 
1322   MI.eraseFromParent();
1323   return true;
1324 }
1325 
1326 bool CombinerHelper::optimizeMemcpy(MachineInstr &MI, Register Dst,
1327                                     Register Src, unsigned KnownLen,
1328                                     Align DstAlign, Align SrcAlign,
1329                                     bool IsVolatile) {
1330   auto &MF = *MI.getParent()->getParent();
1331   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1332   auto &DL = MF.getDataLayout();
1333   LLVMContext &C = MF.getFunction().getContext();
1334 
1335   assert(KnownLen != 0 && "Have a zero length memcpy length!");
1336 
1337   bool DstAlignCanChange = false;
1338   MachineFrameInfo &MFI = MF.getFrameInfo();
1339   bool OptSize = shouldLowerMemFuncForSize(MF);
1340   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1341 
1342   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1343   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1344     DstAlignCanChange = true;
1345 
1346   // FIXME: infer better src pointer alignment like SelectionDAG does here.
1347   // FIXME: also use the equivalent of isMemSrcFromConstant and alwaysinlining
1348   // if the memcpy is in a tail call position.
1349 
1350   unsigned Limit = TLI.getMaxStoresPerMemcpy(OptSize);
1351   std::vector<LLT> MemOps;
1352 
1353   const auto &DstMMO = **MI.memoperands_begin();
1354   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1355   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1356   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1357 
1358   if (!findGISelOptimalMemOpLowering(
1359           MemOps, Limit,
1360           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1361                       IsVolatile),
1362           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1363           MF.getFunction().getAttributes(), TLI))
1364     return false;
1365 
1366   if (DstAlignCanChange) {
1367     // Get an estimate of the type from the LLT.
1368     Type *IRTy = getTypeForLLT(MemOps[0], C);
1369     Align NewAlign = DL.getABITypeAlign(IRTy);
1370 
1371     // Don't promote to an alignment that would require dynamic stack
1372     // realignment.
1373     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1374     if (!TRI->hasStackRealignment(MF))
1375       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1376         NewAlign = NewAlign / 2;
1377 
1378     if (NewAlign > Alignment) {
1379       Alignment = NewAlign;
1380       unsigned FI = FIDef->getOperand(1).getIndex();
1381       // Give the stack frame object a larger alignment if needed.
1382       if (MFI.getObjectAlign(FI) < Alignment)
1383         MFI.setObjectAlignment(FI, Alignment);
1384     }
1385   }
1386 
1387   LLVM_DEBUG(dbgs() << "Inlining memcpy: " << MI << " into loads & stores\n");
1388 
1389   MachineIRBuilder MIB(MI);
1390   // Now we need to emit a pair of load and stores for each of the types we've
1391   // collected. I.e. for each type, generate a load from the source pointer of
1392   // that type width, and then generate a corresponding store to the dest buffer
1393   // of that value loaded. This can result in a sequence of loads and stores
1394   // mixed types, depending on what the target specifies as good types to use.
1395   unsigned CurrOffset = 0;
1396   LLT PtrTy = MRI.getType(Src);
1397   unsigned Size = KnownLen;
1398   for (auto CopyTy : MemOps) {
1399     // Issuing an unaligned load / store pair  that overlaps with the previous
1400     // pair. Adjust the offset accordingly.
1401     if (CopyTy.getSizeInBytes() > Size)
1402       CurrOffset -= CopyTy.getSizeInBytes() - Size;
1403 
1404     // Construct MMOs for the accesses.
1405     auto *LoadMMO =
1406         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1407     auto *StoreMMO =
1408         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1409 
1410     // Create the load.
1411     Register LoadPtr = Src;
1412     Register Offset;
1413     if (CurrOffset != 0) {
1414       Offset = MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset)
1415                    .getReg(0);
1416       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1417     }
1418     auto LdVal = MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO);
1419 
1420     // Create the store.
1421     Register StorePtr =
1422         CurrOffset == 0 ? Dst : MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1423     MIB.buildStore(LdVal, StorePtr, *StoreMMO);
1424     CurrOffset += CopyTy.getSizeInBytes();
1425     Size -= CopyTy.getSizeInBytes();
1426   }
1427 
1428   MI.eraseFromParent();
1429   return true;
1430 }
1431 
1432 bool CombinerHelper::optimizeMemmove(MachineInstr &MI, Register Dst,
1433                                      Register Src, unsigned KnownLen,
1434                                      Align DstAlign, Align SrcAlign,
1435                                      bool IsVolatile) {
1436   auto &MF = *MI.getParent()->getParent();
1437   const auto &TLI = *MF.getSubtarget().getTargetLowering();
1438   auto &DL = MF.getDataLayout();
1439   LLVMContext &C = MF.getFunction().getContext();
1440 
1441   assert(KnownLen != 0 && "Have a zero length memmove length!");
1442 
1443   bool DstAlignCanChange = false;
1444   MachineFrameInfo &MFI = MF.getFrameInfo();
1445   bool OptSize = shouldLowerMemFuncForSize(MF);
1446   Align Alignment = commonAlignment(DstAlign, SrcAlign);
1447 
1448   MachineInstr *FIDef = getOpcodeDef(TargetOpcode::G_FRAME_INDEX, Dst, MRI);
1449   if (FIDef && !MFI.isFixedObjectIndex(FIDef->getOperand(1).getIndex()))
1450     DstAlignCanChange = true;
1451 
1452   unsigned Limit = TLI.getMaxStoresPerMemmove(OptSize);
1453   std::vector<LLT> MemOps;
1454 
1455   const auto &DstMMO = **MI.memoperands_begin();
1456   const auto &SrcMMO = **std::next(MI.memoperands_begin());
1457   MachinePointerInfo DstPtrInfo = DstMMO.getPointerInfo();
1458   MachinePointerInfo SrcPtrInfo = SrcMMO.getPointerInfo();
1459 
1460   // FIXME: SelectionDAG always passes false for 'AllowOverlap', apparently due
1461   // to a bug in it's findOptimalMemOpLowering implementation. For now do the
1462   // same thing here.
1463   if (!findGISelOptimalMemOpLowering(
1464           MemOps, Limit,
1465           MemOp::Copy(KnownLen, DstAlignCanChange, Alignment, SrcAlign,
1466                       /*IsVolatile*/ true),
1467           DstPtrInfo.getAddrSpace(), SrcPtrInfo.getAddrSpace(),
1468           MF.getFunction().getAttributes(), TLI))
1469     return false;
1470 
1471   if (DstAlignCanChange) {
1472     // Get an estimate of the type from the LLT.
1473     Type *IRTy = getTypeForLLT(MemOps[0], C);
1474     Align NewAlign = DL.getABITypeAlign(IRTy);
1475 
1476     // Don't promote to an alignment that would require dynamic stack
1477     // realignment.
1478     const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
1479     if (!TRI->hasStackRealignment(MF))
1480       while (NewAlign > Alignment && DL.exceedsNaturalStackAlignment(NewAlign))
1481         NewAlign = NewAlign / 2;
1482 
1483     if (NewAlign > Alignment) {
1484       Alignment = NewAlign;
1485       unsigned FI = FIDef->getOperand(1).getIndex();
1486       // Give the stack frame object a larger alignment if needed.
1487       if (MFI.getObjectAlign(FI) < Alignment)
1488         MFI.setObjectAlignment(FI, Alignment);
1489     }
1490   }
1491 
1492   LLVM_DEBUG(dbgs() << "Inlining memmove: " << MI << " into loads & stores\n");
1493 
1494   MachineIRBuilder MIB(MI);
1495   // Memmove requires that we perform the loads first before issuing the stores.
1496   // Apart from that, this loop is pretty much doing the same thing as the
1497   // memcpy codegen function.
1498   unsigned CurrOffset = 0;
1499   LLT PtrTy = MRI.getType(Src);
1500   SmallVector<Register, 16> LoadVals;
1501   for (auto CopyTy : MemOps) {
1502     // Construct MMO for the load.
1503     auto *LoadMMO =
1504         MF.getMachineMemOperand(&SrcMMO, CurrOffset, CopyTy.getSizeInBytes());
1505 
1506     // Create the load.
1507     Register LoadPtr = Src;
1508     if (CurrOffset != 0) {
1509       auto Offset =
1510           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1511       LoadPtr = MIB.buildPtrAdd(PtrTy, Src, Offset).getReg(0);
1512     }
1513     LoadVals.push_back(MIB.buildLoad(CopyTy, LoadPtr, *LoadMMO).getReg(0));
1514     CurrOffset += CopyTy.getSizeInBytes();
1515   }
1516 
1517   CurrOffset = 0;
1518   for (unsigned I = 0; I < MemOps.size(); ++I) {
1519     LLT CopyTy = MemOps[I];
1520     // Now store the values loaded.
1521     auto *StoreMMO =
1522         MF.getMachineMemOperand(&DstMMO, CurrOffset, CopyTy.getSizeInBytes());
1523 
1524     Register StorePtr = Dst;
1525     if (CurrOffset != 0) {
1526       auto Offset =
1527           MIB.buildConstant(LLT::scalar(PtrTy.getSizeInBits()), CurrOffset);
1528       StorePtr = MIB.buildPtrAdd(PtrTy, Dst, Offset).getReg(0);
1529     }
1530     MIB.buildStore(LoadVals[I], StorePtr, *StoreMMO);
1531     CurrOffset += CopyTy.getSizeInBytes();
1532   }
1533   MI.eraseFromParent();
1534   return true;
1535 }
1536 
1537 bool CombinerHelper::tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen) {
1538   const unsigned Opc = MI.getOpcode();
1539   // This combine is fairly complex so it's not written with a separate
1540   // matcher function.
1541   assert((Opc == TargetOpcode::G_MEMCPY || Opc == TargetOpcode::G_MEMMOVE ||
1542           Opc == TargetOpcode::G_MEMSET) && "Expected memcpy like instruction");
1543 
1544   auto MMOIt = MI.memoperands_begin();
1545   const MachineMemOperand *MemOp = *MMOIt;
1546   bool IsVolatile = MemOp->isVolatile();
1547   // Don't try to optimize volatile.
1548   if (IsVolatile)
1549     return false;
1550 
1551   Align DstAlign = MemOp->getBaseAlign();
1552   Align SrcAlign;
1553   Register Dst = MI.getOperand(0).getReg();
1554   Register Src = MI.getOperand(1).getReg();
1555   Register Len = MI.getOperand(2).getReg();
1556 
1557   if (Opc != TargetOpcode::G_MEMSET) {
1558     assert(MMOIt != MI.memoperands_end() && "Expected a second MMO on MI");
1559     MemOp = *(++MMOIt);
1560     SrcAlign = MemOp->getBaseAlign();
1561   }
1562 
1563   // See if this is a constant length copy
1564   auto LenVRegAndVal = getConstantVRegValWithLookThrough(Len, MRI);
1565   if (!LenVRegAndVal)
1566     return false; // Leave it to the legalizer to lower it to a libcall.
1567   unsigned KnownLen = LenVRegAndVal->Value.getZExtValue();
1568 
1569   if (KnownLen == 0) {
1570     MI.eraseFromParent();
1571     return true;
1572   }
1573 
1574   if (MaxLen && KnownLen > MaxLen)
1575     return false;
1576 
1577   if (Opc == TargetOpcode::G_MEMCPY)
1578     return optimizeMemcpy(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1579   if (Opc == TargetOpcode::G_MEMMOVE)
1580     return optimizeMemmove(MI, Dst, Src, KnownLen, DstAlign, SrcAlign, IsVolatile);
1581   if (Opc == TargetOpcode::G_MEMSET)
1582     return optimizeMemset(MI, Dst, Src, KnownLen, DstAlign, IsVolatile);
1583   return false;
1584 }
1585 
1586 static Optional<APFloat> constantFoldFpUnary(unsigned Opcode, LLT DstTy,
1587                                              const Register Op,
1588                                              const MachineRegisterInfo &MRI) {
1589   const ConstantFP *MaybeCst = getConstantFPVRegVal(Op, MRI);
1590   if (!MaybeCst)
1591     return None;
1592 
1593   APFloat V = MaybeCst->getValueAPF();
1594   switch (Opcode) {
1595   default:
1596     llvm_unreachable("Unexpected opcode!");
1597   case TargetOpcode::G_FNEG: {
1598     V.changeSign();
1599     return V;
1600   }
1601   case TargetOpcode::G_FABS: {
1602     V.clearSign();
1603     return V;
1604   }
1605   case TargetOpcode::G_FPTRUNC:
1606     break;
1607   case TargetOpcode::G_FSQRT: {
1608     bool Unused;
1609     V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1610     V = APFloat(sqrt(V.convertToDouble()));
1611     break;
1612   }
1613   case TargetOpcode::G_FLOG2: {
1614     bool Unused;
1615     V.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven, &Unused);
1616     V = APFloat(log2(V.convertToDouble()));
1617     break;
1618   }
1619   }
1620   // Convert `APFloat` to appropriate IEEE type depending on `DstTy`. Otherwise,
1621   // `buildFConstant` will assert on size mismatch. Only `G_FPTRUNC`, `G_FSQRT`,
1622   // and `G_FLOG2` reach here.
1623   bool Unused;
1624   V.convert(getFltSemanticForLLT(DstTy), APFloat::rmNearestTiesToEven, &Unused);
1625   return V;
1626 }
1627 
1628 bool CombinerHelper::matchCombineConstantFoldFpUnary(MachineInstr &MI,
1629                                                      Optional<APFloat> &Cst) {
1630   Register DstReg = MI.getOperand(0).getReg();
1631   Register SrcReg = MI.getOperand(1).getReg();
1632   LLT DstTy = MRI.getType(DstReg);
1633   Cst = constantFoldFpUnary(MI.getOpcode(), DstTy, SrcReg, MRI);
1634   return Cst.hasValue();
1635 }
1636 
1637 bool CombinerHelper::applyCombineConstantFoldFpUnary(MachineInstr &MI,
1638                                                      Optional<APFloat> &Cst) {
1639   assert(Cst.hasValue() && "Optional is unexpectedly empty!");
1640   Builder.setInstrAndDebugLoc(MI);
1641   MachineFunction &MF = Builder.getMF();
1642   auto *FPVal = ConstantFP::get(MF.getFunction().getContext(), *Cst);
1643   Register DstReg = MI.getOperand(0).getReg();
1644   Builder.buildFConstant(DstReg, *FPVal);
1645   MI.eraseFromParent();
1646   return true;
1647 }
1648 
1649 bool CombinerHelper::matchPtrAddImmedChain(MachineInstr &MI,
1650                                            PtrAddChain &MatchInfo) {
1651   // We're trying to match the following pattern:
1652   //   %t1 = G_PTR_ADD %base, G_CONSTANT imm1
1653   //   %root = G_PTR_ADD %t1, G_CONSTANT imm2
1654   // -->
1655   //   %root = G_PTR_ADD %base, G_CONSTANT (imm1 + imm2)
1656 
1657   if (MI.getOpcode() != TargetOpcode::G_PTR_ADD)
1658     return false;
1659 
1660   Register Add2 = MI.getOperand(1).getReg();
1661   Register Imm1 = MI.getOperand(2).getReg();
1662   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1663   if (!MaybeImmVal)
1664     return false;
1665 
1666   MachineInstr *Add2Def = MRI.getUniqueVRegDef(Add2);
1667   if (!Add2Def || Add2Def->getOpcode() != TargetOpcode::G_PTR_ADD)
1668     return false;
1669 
1670   Register Base = Add2Def->getOperand(1).getReg();
1671   Register Imm2 = Add2Def->getOperand(2).getReg();
1672   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1673   if (!MaybeImm2Val)
1674     return false;
1675 
1676   // Pass the combined immediate to the apply function.
1677   MatchInfo.Imm = (MaybeImmVal->Value + MaybeImm2Val->Value).getSExtValue();
1678   MatchInfo.Base = Base;
1679   return true;
1680 }
1681 
1682 bool CombinerHelper::applyPtrAddImmedChain(MachineInstr &MI,
1683                                            PtrAddChain &MatchInfo) {
1684   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected G_PTR_ADD");
1685   MachineIRBuilder MIB(MI);
1686   LLT OffsetTy = MRI.getType(MI.getOperand(2).getReg());
1687   auto NewOffset = MIB.buildConstant(OffsetTy, MatchInfo.Imm);
1688   Observer.changingInstr(MI);
1689   MI.getOperand(1).setReg(MatchInfo.Base);
1690   MI.getOperand(2).setReg(NewOffset.getReg(0));
1691   Observer.changedInstr(MI);
1692   return true;
1693 }
1694 
1695 bool CombinerHelper::matchShiftImmedChain(MachineInstr &MI,
1696                                           RegisterImmPair &MatchInfo) {
1697   // We're trying to match the following pattern with any of
1698   // G_SHL/G_ASHR/G_LSHR/G_SSHLSAT/G_USHLSAT shift instructions:
1699   //   %t1 = SHIFT %base, G_CONSTANT imm1
1700   //   %root = SHIFT %t1, G_CONSTANT imm2
1701   // -->
1702   //   %root = SHIFT %base, G_CONSTANT (imm1 + imm2)
1703 
1704   unsigned Opcode = MI.getOpcode();
1705   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1706           Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1707           Opcode == TargetOpcode::G_USHLSAT) &&
1708          "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1709 
1710   Register Shl2 = MI.getOperand(1).getReg();
1711   Register Imm1 = MI.getOperand(2).getReg();
1712   auto MaybeImmVal = getConstantVRegValWithLookThrough(Imm1, MRI);
1713   if (!MaybeImmVal)
1714     return false;
1715 
1716   MachineInstr *Shl2Def = MRI.getUniqueVRegDef(Shl2);
1717   if (Shl2Def->getOpcode() != Opcode)
1718     return false;
1719 
1720   Register Base = Shl2Def->getOperand(1).getReg();
1721   Register Imm2 = Shl2Def->getOperand(2).getReg();
1722   auto MaybeImm2Val = getConstantVRegValWithLookThrough(Imm2, MRI);
1723   if (!MaybeImm2Val)
1724     return false;
1725 
1726   // Pass the combined immediate to the apply function.
1727   MatchInfo.Imm =
1728       (MaybeImmVal->Value.getSExtValue() + MaybeImm2Val->Value).getSExtValue();
1729   MatchInfo.Reg = Base;
1730 
1731   // There is no simple replacement for a saturating unsigned left shift that
1732   // exceeds the scalar size.
1733   if (Opcode == TargetOpcode::G_USHLSAT &&
1734       MatchInfo.Imm >= MRI.getType(Shl2).getScalarSizeInBits())
1735     return false;
1736 
1737   return true;
1738 }
1739 
1740 bool CombinerHelper::applyShiftImmedChain(MachineInstr &MI,
1741                                           RegisterImmPair &MatchInfo) {
1742   unsigned Opcode = MI.getOpcode();
1743   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1744           Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_SSHLSAT ||
1745           Opcode == TargetOpcode::G_USHLSAT) &&
1746          "Expected G_SHL, G_ASHR, G_LSHR, G_SSHLSAT or G_USHLSAT");
1747 
1748   Builder.setInstrAndDebugLoc(MI);
1749   LLT Ty = MRI.getType(MI.getOperand(1).getReg());
1750   unsigned const ScalarSizeInBits = Ty.getScalarSizeInBits();
1751   auto Imm = MatchInfo.Imm;
1752 
1753   if (Imm >= ScalarSizeInBits) {
1754     // Any logical shift that exceeds scalar size will produce zero.
1755     if (Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_LSHR) {
1756       Builder.buildConstant(MI.getOperand(0), 0);
1757       MI.eraseFromParent();
1758       return true;
1759     }
1760     // Arithmetic shift and saturating signed left shift have no effect beyond
1761     // scalar size.
1762     Imm = ScalarSizeInBits - 1;
1763   }
1764 
1765   LLT ImmTy = MRI.getType(MI.getOperand(2).getReg());
1766   Register NewImm = Builder.buildConstant(ImmTy, Imm).getReg(0);
1767   Observer.changingInstr(MI);
1768   MI.getOperand(1).setReg(MatchInfo.Reg);
1769   MI.getOperand(2).setReg(NewImm);
1770   Observer.changedInstr(MI);
1771   return true;
1772 }
1773 
1774 bool CombinerHelper::matchShiftOfShiftedLogic(MachineInstr &MI,
1775                                               ShiftOfShiftedLogic &MatchInfo) {
1776   // We're trying to match the following pattern with any of
1777   // G_SHL/G_ASHR/G_LSHR/G_USHLSAT/G_SSHLSAT shift instructions in combination
1778   // with any of G_AND/G_OR/G_XOR logic instructions.
1779   //   %t1 = SHIFT %X, G_CONSTANT C0
1780   //   %t2 = LOGIC %t1, %Y
1781   //   %root = SHIFT %t2, G_CONSTANT C1
1782   // -->
1783   //   %t3 = SHIFT %X, G_CONSTANT (C0+C1)
1784   //   %t4 = SHIFT %Y, G_CONSTANT C1
1785   //   %root = LOGIC %t3, %t4
1786   unsigned ShiftOpcode = MI.getOpcode();
1787   assert((ShiftOpcode == TargetOpcode::G_SHL ||
1788           ShiftOpcode == TargetOpcode::G_ASHR ||
1789           ShiftOpcode == TargetOpcode::G_LSHR ||
1790           ShiftOpcode == TargetOpcode::G_USHLSAT ||
1791           ShiftOpcode == TargetOpcode::G_SSHLSAT) &&
1792          "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1793 
1794   // Match a one-use bitwise logic op.
1795   Register LogicDest = MI.getOperand(1).getReg();
1796   if (!MRI.hasOneNonDBGUse(LogicDest))
1797     return false;
1798 
1799   MachineInstr *LogicMI = MRI.getUniqueVRegDef(LogicDest);
1800   unsigned LogicOpcode = LogicMI->getOpcode();
1801   if (LogicOpcode != TargetOpcode::G_AND && LogicOpcode != TargetOpcode::G_OR &&
1802       LogicOpcode != TargetOpcode::G_XOR)
1803     return false;
1804 
1805   // Find a matching one-use shift by constant.
1806   const Register C1 = MI.getOperand(2).getReg();
1807   auto MaybeImmVal = getConstantVRegValWithLookThrough(C1, MRI);
1808   if (!MaybeImmVal)
1809     return false;
1810 
1811   const uint64_t C1Val = MaybeImmVal->Value.getZExtValue();
1812 
1813   auto matchFirstShift = [&](const MachineInstr *MI, uint64_t &ShiftVal) {
1814     // Shift should match previous one and should be a one-use.
1815     if (MI->getOpcode() != ShiftOpcode ||
1816         !MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
1817       return false;
1818 
1819     // Must be a constant.
1820     auto MaybeImmVal =
1821         getConstantVRegValWithLookThrough(MI->getOperand(2).getReg(), MRI);
1822     if (!MaybeImmVal)
1823       return false;
1824 
1825     ShiftVal = MaybeImmVal->Value.getSExtValue();
1826     return true;
1827   };
1828 
1829   // Logic ops are commutative, so check each operand for a match.
1830   Register LogicMIReg1 = LogicMI->getOperand(1).getReg();
1831   MachineInstr *LogicMIOp1 = MRI.getUniqueVRegDef(LogicMIReg1);
1832   Register LogicMIReg2 = LogicMI->getOperand(2).getReg();
1833   MachineInstr *LogicMIOp2 = MRI.getUniqueVRegDef(LogicMIReg2);
1834   uint64_t C0Val;
1835 
1836   if (matchFirstShift(LogicMIOp1, C0Val)) {
1837     MatchInfo.LogicNonShiftReg = LogicMIReg2;
1838     MatchInfo.Shift2 = LogicMIOp1;
1839   } else if (matchFirstShift(LogicMIOp2, C0Val)) {
1840     MatchInfo.LogicNonShiftReg = LogicMIReg1;
1841     MatchInfo.Shift2 = LogicMIOp2;
1842   } else
1843     return false;
1844 
1845   MatchInfo.ValSum = C0Val + C1Val;
1846 
1847   // The fold is not valid if the sum of the shift values exceeds bitwidth.
1848   if (MatchInfo.ValSum >= MRI.getType(LogicDest).getScalarSizeInBits())
1849     return false;
1850 
1851   MatchInfo.Logic = LogicMI;
1852   return true;
1853 }
1854 
1855 bool CombinerHelper::applyShiftOfShiftedLogic(MachineInstr &MI,
1856                                               ShiftOfShiftedLogic &MatchInfo) {
1857   unsigned Opcode = MI.getOpcode();
1858   assert((Opcode == TargetOpcode::G_SHL || Opcode == TargetOpcode::G_ASHR ||
1859           Opcode == TargetOpcode::G_LSHR || Opcode == TargetOpcode::G_USHLSAT ||
1860           Opcode == TargetOpcode::G_SSHLSAT) &&
1861          "Expected G_SHL, G_ASHR, G_LSHR, G_USHLSAT and G_SSHLSAT");
1862 
1863   LLT ShlType = MRI.getType(MI.getOperand(2).getReg());
1864   LLT DestType = MRI.getType(MI.getOperand(0).getReg());
1865   Builder.setInstrAndDebugLoc(MI);
1866 
1867   Register Const = Builder.buildConstant(ShlType, MatchInfo.ValSum).getReg(0);
1868 
1869   Register Shift1Base = MatchInfo.Shift2->getOperand(1).getReg();
1870   Register Shift1 =
1871       Builder.buildInstr(Opcode, {DestType}, {Shift1Base, Const}).getReg(0);
1872 
1873   Register Shift2Const = MI.getOperand(2).getReg();
1874   Register Shift2 = Builder
1875                         .buildInstr(Opcode, {DestType},
1876                                     {MatchInfo.LogicNonShiftReg, Shift2Const})
1877                         .getReg(0);
1878 
1879   Register Dest = MI.getOperand(0).getReg();
1880   Builder.buildInstr(MatchInfo.Logic->getOpcode(), {Dest}, {Shift1, Shift2});
1881 
1882   // These were one use so it's safe to remove them.
1883   MatchInfo.Shift2->eraseFromParent();
1884   MatchInfo.Logic->eraseFromParent();
1885 
1886   MI.eraseFromParent();
1887   return true;
1888 }
1889 
1890 bool CombinerHelper::matchCombineMulToShl(MachineInstr &MI,
1891                                           unsigned &ShiftVal) {
1892   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1893   auto MaybeImmVal =
1894       getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
1895   if (!MaybeImmVal)
1896     return false;
1897 
1898   ShiftVal = MaybeImmVal->Value.exactLogBase2();
1899   return (static_cast<int32_t>(ShiftVal) != -1);
1900 }
1901 
1902 bool CombinerHelper::applyCombineMulToShl(MachineInstr &MI,
1903                                           unsigned &ShiftVal) {
1904   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
1905   MachineIRBuilder MIB(MI);
1906   LLT ShiftTy = MRI.getType(MI.getOperand(0).getReg());
1907   auto ShiftCst = MIB.buildConstant(ShiftTy, ShiftVal);
1908   Observer.changingInstr(MI);
1909   MI.setDesc(MIB.getTII().get(TargetOpcode::G_SHL));
1910   MI.getOperand(2).setReg(ShiftCst.getReg(0));
1911   Observer.changedInstr(MI);
1912   return true;
1913 }
1914 
1915 // shl ([sza]ext x), y => zext (shl x, y), if shift does not overflow source
1916 bool CombinerHelper::matchCombineShlOfExtend(MachineInstr &MI,
1917                                              RegisterImmPair &MatchData) {
1918   assert(MI.getOpcode() == TargetOpcode::G_SHL && KB);
1919 
1920   Register LHS = MI.getOperand(1).getReg();
1921 
1922   Register ExtSrc;
1923   if (!mi_match(LHS, MRI, m_GAnyExt(m_Reg(ExtSrc))) &&
1924       !mi_match(LHS, MRI, m_GZExt(m_Reg(ExtSrc))) &&
1925       !mi_match(LHS, MRI, m_GSExt(m_Reg(ExtSrc))))
1926     return false;
1927 
1928   // TODO: Should handle vector splat.
1929   Register RHS = MI.getOperand(2).getReg();
1930   auto MaybeShiftAmtVal = getConstantVRegValWithLookThrough(RHS, MRI);
1931   if (!MaybeShiftAmtVal)
1932     return false;
1933 
1934   if (LI) {
1935     LLT SrcTy = MRI.getType(ExtSrc);
1936 
1937     // We only really care about the legality with the shifted value. We can
1938     // pick any type the constant shift amount, so ask the target what to
1939     // use. Otherwise we would have to guess and hope it is reported as legal.
1940     LLT ShiftAmtTy = getTargetLowering().getPreferredShiftAmountTy(SrcTy);
1941     if (!isLegalOrBeforeLegalizer({TargetOpcode::G_SHL, {SrcTy, ShiftAmtTy}}))
1942       return false;
1943   }
1944 
1945   int64_t ShiftAmt = MaybeShiftAmtVal->Value.getSExtValue();
1946   MatchData.Reg = ExtSrc;
1947   MatchData.Imm = ShiftAmt;
1948 
1949   unsigned MinLeadingZeros = KB->getKnownZeroes(ExtSrc).countLeadingOnes();
1950   return MinLeadingZeros >= ShiftAmt;
1951 }
1952 
1953 bool CombinerHelper::applyCombineShlOfExtend(MachineInstr &MI,
1954                                              const RegisterImmPair &MatchData) {
1955   Register ExtSrcReg = MatchData.Reg;
1956   int64_t ShiftAmtVal = MatchData.Imm;
1957 
1958   LLT ExtSrcTy = MRI.getType(ExtSrcReg);
1959   Builder.setInstrAndDebugLoc(MI);
1960   auto ShiftAmt = Builder.buildConstant(ExtSrcTy, ShiftAmtVal);
1961   auto NarrowShift =
1962       Builder.buildShl(ExtSrcTy, ExtSrcReg, ShiftAmt, MI.getFlags());
1963   Builder.buildZExt(MI.getOperand(0), NarrowShift);
1964   MI.eraseFromParent();
1965   return true;
1966 }
1967 
1968 static Register peekThroughBitcast(Register Reg,
1969                                    const MachineRegisterInfo &MRI) {
1970   while (mi_match(Reg, MRI, m_GBitcast(m_Reg(Reg))))
1971     ;
1972 
1973   return Reg;
1974 }
1975 
1976 bool CombinerHelper::matchCombineUnmergeMergeToPlainValues(
1977     MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
1978   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
1979          "Expected an unmerge");
1980   Register SrcReg =
1981       peekThroughBitcast(MI.getOperand(MI.getNumOperands() - 1).getReg(), MRI);
1982 
1983   MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
1984   if (SrcInstr->getOpcode() != TargetOpcode::G_MERGE_VALUES &&
1985       SrcInstr->getOpcode() != TargetOpcode::G_BUILD_VECTOR &&
1986       SrcInstr->getOpcode() != TargetOpcode::G_CONCAT_VECTORS)
1987     return false;
1988 
1989   // Check the source type of the merge.
1990   LLT SrcMergeTy = MRI.getType(SrcInstr->getOperand(1).getReg());
1991   LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
1992   bool SameSize = Dst0Ty.getSizeInBits() == SrcMergeTy.getSizeInBits();
1993   if (SrcMergeTy != Dst0Ty && !SameSize)
1994     return false;
1995   // They are the same now (modulo a bitcast).
1996   // We can collect all the src registers.
1997   for (unsigned Idx = 1, EndIdx = SrcInstr->getNumOperands(); Idx != EndIdx;
1998        ++Idx)
1999     Operands.push_back(SrcInstr->getOperand(Idx).getReg());
2000   return true;
2001 }
2002 
2003 bool CombinerHelper::applyCombineUnmergeMergeToPlainValues(
2004     MachineInstr &MI, SmallVectorImpl<Register> &Operands) {
2005   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2006          "Expected an unmerge");
2007   assert((MI.getNumOperands() - 1 == Operands.size()) &&
2008          "Not enough operands to replace all defs");
2009   unsigned NumElems = MI.getNumOperands() - 1;
2010 
2011   LLT SrcTy = MRI.getType(Operands[0]);
2012   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2013   bool CanReuseInputDirectly = DstTy == SrcTy;
2014   Builder.setInstrAndDebugLoc(MI);
2015   for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2016     Register DstReg = MI.getOperand(Idx).getReg();
2017     Register SrcReg = Operands[Idx];
2018     if (CanReuseInputDirectly)
2019       replaceRegWith(MRI, DstReg, SrcReg);
2020     else
2021       Builder.buildCast(DstReg, SrcReg);
2022   }
2023   MI.eraseFromParent();
2024   return true;
2025 }
2026 
2027 bool CombinerHelper::matchCombineUnmergeConstant(MachineInstr &MI,
2028                                                  SmallVectorImpl<APInt> &Csts) {
2029   unsigned SrcIdx = MI.getNumOperands() - 1;
2030   Register SrcReg = MI.getOperand(SrcIdx).getReg();
2031   MachineInstr *SrcInstr = MRI.getVRegDef(SrcReg);
2032   if (SrcInstr->getOpcode() != TargetOpcode::G_CONSTANT &&
2033       SrcInstr->getOpcode() != TargetOpcode::G_FCONSTANT)
2034     return false;
2035   // Break down the big constant in smaller ones.
2036   const MachineOperand &CstVal = SrcInstr->getOperand(1);
2037   APInt Val = SrcInstr->getOpcode() == TargetOpcode::G_CONSTANT
2038                   ? CstVal.getCImm()->getValue()
2039                   : CstVal.getFPImm()->getValueAPF().bitcastToAPInt();
2040 
2041   LLT Dst0Ty = MRI.getType(MI.getOperand(0).getReg());
2042   unsigned ShiftAmt = Dst0Ty.getSizeInBits();
2043   // Unmerge a constant.
2044   for (unsigned Idx = 0; Idx != SrcIdx; ++Idx) {
2045     Csts.emplace_back(Val.trunc(ShiftAmt));
2046     Val = Val.lshr(ShiftAmt);
2047   }
2048 
2049   return true;
2050 }
2051 
2052 bool CombinerHelper::applyCombineUnmergeConstant(MachineInstr &MI,
2053                                                  SmallVectorImpl<APInt> &Csts) {
2054   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2055          "Expected an unmerge");
2056   assert((MI.getNumOperands() - 1 == Csts.size()) &&
2057          "Not enough operands to replace all defs");
2058   unsigned NumElems = MI.getNumOperands() - 1;
2059   Builder.setInstrAndDebugLoc(MI);
2060   for (unsigned Idx = 0; Idx < NumElems; ++Idx) {
2061     Register DstReg = MI.getOperand(Idx).getReg();
2062     Builder.buildConstant(DstReg, Csts[Idx]);
2063   }
2064 
2065   MI.eraseFromParent();
2066   return true;
2067 }
2068 
2069 bool CombinerHelper::matchCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
2070   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2071          "Expected an unmerge");
2072   // Check that all the lanes are dead except the first one.
2073   for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2074     if (!MRI.use_nodbg_empty(MI.getOperand(Idx).getReg()))
2075       return false;
2076   }
2077   return true;
2078 }
2079 
2080 bool CombinerHelper::applyCombineUnmergeWithDeadLanesToTrunc(MachineInstr &MI) {
2081   Builder.setInstrAndDebugLoc(MI);
2082   Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2083   // Truncating a vector is going to truncate every single lane,
2084   // whereas we want the full lowbits.
2085   // Do the operation on a scalar instead.
2086   LLT SrcTy = MRI.getType(SrcReg);
2087   if (SrcTy.isVector())
2088     SrcReg =
2089         Builder.buildCast(LLT::scalar(SrcTy.getSizeInBits()), SrcReg).getReg(0);
2090 
2091   Register Dst0Reg = MI.getOperand(0).getReg();
2092   LLT Dst0Ty = MRI.getType(Dst0Reg);
2093   if (Dst0Ty.isVector()) {
2094     auto MIB = Builder.buildTrunc(LLT::scalar(Dst0Ty.getSizeInBits()), SrcReg);
2095     Builder.buildCast(Dst0Reg, MIB);
2096   } else
2097     Builder.buildTrunc(Dst0Reg, SrcReg);
2098   MI.eraseFromParent();
2099   return true;
2100 }
2101 
2102 bool CombinerHelper::matchCombineUnmergeZExtToZExt(MachineInstr &MI) {
2103   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2104          "Expected an unmerge");
2105   Register Dst0Reg = MI.getOperand(0).getReg();
2106   LLT Dst0Ty = MRI.getType(Dst0Reg);
2107   // G_ZEXT on vector applies to each lane, so it will
2108   // affect all destinations. Therefore we won't be able
2109   // to simplify the unmerge to just the first definition.
2110   if (Dst0Ty.isVector())
2111     return false;
2112   Register SrcReg = MI.getOperand(MI.getNumDefs()).getReg();
2113   LLT SrcTy = MRI.getType(SrcReg);
2114   if (SrcTy.isVector())
2115     return false;
2116 
2117   Register ZExtSrcReg;
2118   if (!mi_match(SrcReg, MRI, m_GZExt(m_Reg(ZExtSrcReg))))
2119     return false;
2120 
2121   // Finally we can replace the first definition with
2122   // a zext of the source if the definition is big enough to hold
2123   // all of ZExtSrc bits.
2124   LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2125   return ZExtSrcTy.getSizeInBits() <= Dst0Ty.getSizeInBits();
2126 }
2127 
2128 bool CombinerHelper::applyCombineUnmergeZExtToZExt(MachineInstr &MI) {
2129   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES &&
2130          "Expected an unmerge");
2131 
2132   Register Dst0Reg = MI.getOperand(0).getReg();
2133 
2134   MachineInstr *ZExtInstr =
2135       MRI.getVRegDef(MI.getOperand(MI.getNumDefs()).getReg());
2136   assert(ZExtInstr && ZExtInstr->getOpcode() == TargetOpcode::G_ZEXT &&
2137          "Expecting a G_ZEXT");
2138 
2139   Register ZExtSrcReg = ZExtInstr->getOperand(1).getReg();
2140   LLT Dst0Ty = MRI.getType(Dst0Reg);
2141   LLT ZExtSrcTy = MRI.getType(ZExtSrcReg);
2142 
2143   Builder.setInstrAndDebugLoc(MI);
2144 
2145   if (Dst0Ty.getSizeInBits() > ZExtSrcTy.getSizeInBits()) {
2146     Builder.buildZExt(Dst0Reg, ZExtSrcReg);
2147   } else {
2148     assert(Dst0Ty.getSizeInBits() == ZExtSrcTy.getSizeInBits() &&
2149            "ZExt src doesn't fit in destination");
2150     replaceRegWith(MRI, Dst0Reg, ZExtSrcReg);
2151   }
2152 
2153   Register ZeroReg;
2154   for (unsigned Idx = 1, EndIdx = MI.getNumDefs(); Idx != EndIdx; ++Idx) {
2155     if (!ZeroReg)
2156       ZeroReg = Builder.buildConstant(Dst0Ty, 0).getReg(0);
2157     replaceRegWith(MRI, MI.getOperand(Idx).getReg(), ZeroReg);
2158   }
2159   MI.eraseFromParent();
2160   return true;
2161 }
2162 
2163 bool CombinerHelper::matchCombineShiftToUnmerge(MachineInstr &MI,
2164                                                 unsigned TargetShiftSize,
2165                                                 unsigned &ShiftVal) {
2166   assert((MI.getOpcode() == TargetOpcode::G_SHL ||
2167           MI.getOpcode() == TargetOpcode::G_LSHR ||
2168           MI.getOpcode() == TargetOpcode::G_ASHR) && "Expected a shift");
2169 
2170   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
2171   if (Ty.isVector()) // TODO:
2172     return false;
2173 
2174   // Don't narrow further than the requested size.
2175   unsigned Size = Ty.getSizeInBits();
2176   if (Size <= TargetShiftSize)
2177     return false;
2178 
2179   auto MaybeImmVal =
2180     getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
2181   if (!MaybeImmVal)
2182     return false;
2183 
2184   ShiftVal = MaybeImmVal->Value.getSExtValue();
2185   return ShiftVal >= Size / 2 && ShiftVal < Size;
2186 }
2187 
2188 bool CombinerHelper::applyCombineShiftToUnmerge(MachineInstr &MI,
2189                                                 const unsigned &ShiftVal) {
2190   Register DstReg = MI.getOperand(0).getReg();
2191   Register SrcReg = MI.getOperand(1).getReg();
2192   LLT Ty = MRI.getType(SrcReg);
2193   unsigned Size = Ty.getSizeInBits();
2194   unsigned HalfSize = Size / 2;
2195   assert(ShiftVal >= HalfSize);
2196 
2197   LLT HalfTy = LLT::scalar(HalfSize);
2198 
2199   Builder.setInstr(MI);
2200   auto Unmerge = Builder.buildUnmerge(HalfTy, SrcReg);
2201   unsigned NarrowShiftAmt = ShiftVal - HalfSize;
2202 
2203   if (MI.getOpcode() == TargetOpcode::G_LSHR) {
2204     Register Narrowed = Unmerge.getReg(1);
2205 
2206     //  dst = G_LSHR s64:x, C for C >= 32
2207     // =>
2208     //   lo, hi = G_UNMERGE_VALUES x
2209     //   dst = G_MERGE_VALUES (G_LSHR hi, C - 32), 0
2210 
2211     if (NarrowShiftAmt != 0) {
2212       Narrowed = Builder.buildLShr(HalfTy, Narrowed,
2213         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2214     }
2215 
2216     auto Zero = Builder.buildConstant(HalfTy, 0);
2217     Builder.buildMerge(DstReg, { Narrowed, Zero });
2218   } else if (MI.getOpcode() == TargetOpcode::G_SHL) {
2219     Register Narrowed = Unmerge.getReg(0);
2220     //  dst = G_SHL s64:x, C for C >= 32
2221     // =>
2222     //   lo, hi = G_UNMERGE_VALUES x
2223     //   dst = G_MERGE_VALUES 0, (G_SHL hi, C - 32)
2224     if (NarrowShiftAmt != 0) {
2225       Narrowed = Builder.buildShl(HalfTy, Narrowed,
2226         Builder.buildConstant(HalfTy, NarrowShiftAmt)).getReg(0);
2227     }
2228 
2229     auto Zero = Builder.buildConstant(HalfTy, 0);
2230     Builder.buildMerge(DstReg, { Zero, Narrowed });
2231   } else {
2232     assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2233     auto Hi = Builder.buildAShr(
2234       HalfTy, Unmerge.getReg(1),
2235       Builder.buildConstant(HalfTy, HalfSize - 1));
2236 
2237     if (ShiftVal == HalfSize) {
2238       // (G_ASHR i64:x, 32) ->
2239       //   G_MERGE_VALUES hi_32(x), (G_ASHR hi_32(x), 31)
2240       Builder.buildMerge(DstReg, { Unmerge.getReg(1), Hi });
2241     } else if (ShiftVal == Size - 1) {
2242       // Don't need a second shift.
2243       // (G_ASHR i64:x, 63) ->
2244       //   %narrowed = (G_ASHR hi_32(x), 31)
2245       //   G_MERGE_VALUES %narrowed, %narrowed
2246       Builder.buildMerge(DstReg, { Hi, Hi });
2247     } else {
2248       auto Lo = Builder.buildAShr(
2249         HalfTy, Unmerge.getReg(1),
2250         Builder.buildConstant(HalfTy, ShiftVal - HalfSize));
2251 
2252       // (G_ASHR i64:x, C) ->, for C >= 32
2253       //   G_MERGE_VALUES (G_ASHR hi_32(x), C - 32), (G_ASHR hi_32(x), 31)
2254       Builder.buildMerge(DstReg, { Lo, Hi });
2255     }
2256   }
2257 
2258   MI.eraseFromParent();
2259   return true;
2260 }
2261 
2262 bool CombinerHelper::tryCombineShiftToUnmerge(MachineInstr &MI,
2263                                               unsigned TargetShiftAmount) {
2264   unsigned ShiftAmt;
2265   if (matchCombineShiftToUnmerge(MI, TargetShiftAmount, ShiftAmt)) {
2266     applyCombineShiftToUnmerge(MI, ShiftAmt);
2267     return true;
2268   }
2269 
2270   return false;
2271 }
2272 
2273 bool CombinerHelper::matchCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2274   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2275   Register DstReg = MI.getOperand(0).getReg();
2276   LLT DstTy = MRI.getType(DstReg);
2277   Register SrcReg = MI.getOperand(1).getReg();
2278   return mi_match(SrcReg, MRI,
2279                   m_GPtrToInt(m_all_of(m_SpecificType(DstTy), m_Reg(Reg))));
2280 }
2281 
2282 bool CombinerHelper::applyCombineI2PToP2I(MachineInstr &MI, Register &Reg) {
2283   assert(MI.getOpcode() == TargetOpcode::G_INTTOPTR && "Expected a G_INTTOPTR");
2284   Register DstReg = MI.getOperand(0).getReg();
2285   Builder.setInstr(MI);
2286   Builder.buildCopy(DstReg, Reg);
2287   MI.eraseFromParent();
2288   return true;
2289 }
2290 
2291 bool CombinerHelper::matchCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2292   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2293   Register SrcReg = MI.getOperand(1).getReg();
2294   return mi_match(SrcReg, MRI, m_GIntToPtr(m_Reg(Reg)));
2295 }
2296 
2297 bool CombinerHelper::applyCombineP2IToI2P(MachineInstr &MI, Register &Reg) {
2298   assert(MI.getOpcode() == TargetOpcode::G_PTRTOINT && "Expected a G_PTRTOINT");
2299   Register DstReg = MI.getOperand(0).getReg();
2300   Builder.setInstr(MI);
2301   Builder.buildZExtOrTrunc(DstReg, Reg);
2302   MI.eraseFromParent();
2303   return true;
2304 }
2305 
2306 bool CombinerHelper::matchCombineAddP2IToPtrAdd(
2307     MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2308   assert(MI.getOpcode() == TargetOpcode::G_ADD);
2309   Register LHS = MI.getOperand(1).getReg();
2310   Register RHS = MI.getOperand(2).getReg();
2311   LLT IntTy = MRI.getType(LHS);
2312 
2313   // G_PTR_ADD always has the pointer in the LHS, so we may need to commute the
2314   // instruction.
2315   PtrReg.second = false;
2316   for (Register SrcReg : {LHS, RHS}) {
2317     if (mi_match(SrcReg, MRI, m_GPtrToInt(m_Reg(PtrReg.first)))) {
2318       // Don't handle cases where the integer is implicitly converted to the
2319       // pointer width.
2320       LLT PtrTy = MRI.getType(PtrReg.first);
2321       if (PtrTy.getScalarSizeInBits() == IntTy.getScalarSizeInBits())
2322         return true;
2323     }
2324 
2325     PtrReg.second = true;
2326   }
2327 
2328   return false;
2329 }
2330 
2331 bool CombinerHelper::applyCombineAddP2IToPtrAdd(
2332     MachineInstr &MI, std::pair<Register, bool> &PtrReg) {
2333   Register Dst = MI.getOperand(0).getReg();
2334   Register LHS = MI.getOperand(1).getReg();
2335   Register RHS = MI.getOperand(2).getReg();
2336 
2337   const bool DoCommute = PtrReg.second;
2338   if (DoCommute)
2339     std::swap(LHS, RHS);
2340   LHS = PtrReg.first;
2341 
2342   LLT PtrTy = MRI.getType(LHS);
2343 
2344   Builder.setInstrAndDebugLoc(MI);
2345   auto PtrAdd = Builder.buildPtrAdd(PtrTy, LHS, RHS);
2346   Builder.buildPtrToInt(Dst, PtrAdd);
2347   MI.eraseFromParent();
2348   return true;
2349 }
2350 
2351 bool CombinerHelper::matchCombineConstPtrAddToI2P(MachineInstr &MI,
2352                                                   int64_t &NewCst) {
2353   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
2354   Register LHS = MI.getOperand(1).getReg();
2355   Register RHS = MI.getOperand(2).getReg();
2356   MachineRegisterInfo &MRI = Builder.getMF().getRegInfo();
2357 
2358   if (auto RHSCst = getConstantVRegSExtVal(RHS, MRI)) {
2359     int64_t Cst;
2360     if (mi_match(LHS, MRI, m_GIntToPtr(m_ICst(Cst)))) {
2361       NewCst = Cst + *RHSCst;
2362       return true;
2363     }
2364   }
2365 
2366   return false;
2367 }
2368 
2369 bool CombinerHelper::applyCombineConstPtrAddToI2P(MachineInstr &MI,
2370                                                   int64_t &NewCst) {
2371   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD && "Expected a G_PTR_ADD");
2372   Register Dst = MI.getOperand(0).getReg();
2373 
2374   Builder.setInstrAndDebugLoc(MI);
2375   Builder.buildConstant(Dst, NewCst);
2376   MI.eraseFromParent();
2377   return true;
2378 }
2379 
2380 bool CombinerHelper::matchCombineAnyExtTrunc(MachineInstr &MI, Register &Reg) {
2381   assert(MI.getOpcode() == TargetOpcode::G_ANYEXT && "Expected a G_ANYEXT");
2382   Register DstReg = MI.getOperand(0).getReg();
2383   Register SrcReg = MI.getOperand(1).getReg();
2384   LLT DstTy = MRI.getType(DstReg);
2385   return mi_match(SrcReg, MRI,
2386                   m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))));
2387 }
2388 
2389 bool CombinerHelper::matchCombineZextTrunc(MachineInstr &MI, Register &Reg) {
2390   assert(MI.getOpcode() == TargetOpcode::G_ZEXT && "Expected a G_ZEXT");
2391   Register DstReg = MI.getOperand(0).getReg();
2392   Register SrcReg = MI.getOperand(1).getReg();
2393   LLT DstTy = MRI.getType(DstReg);
2394   if (mi_match(SrcReg, MRI,
2395                m_GTrunc(m_all_of(m_Reg(Reg), m_SpecificType(DstTy))))) {
2396     unsigned DstSize = DstTy.getScalarSizeInBits();
2397     unsigned SrcSize = MRI.getType(SrcReg).getScalarSizeInBits();
2398     return KB->getKnownBits(Reg).countMinLeadingZeros() >= DstSize - SrcSize;
2399   }
2400   return false;
2401 }
2402 
2403 bool CombinerHelper::matchCombineExtOfExt(
2404     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2405   assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2406           MI.getOpcode() == TargetOpcode::G_SEXT ||
2407           MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2408          "Expected a G_[ASZ]EXT");
2409   Register SrcReg = MI.getOperand(1).getReg();
2410   MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2411   // Match exts with the same opcode, anyext([sz]ext) and sext(zext).
2412   unsigned Opc = MI.getOpcode();
2413   unsigned SrcOpc = SrcMI->getOpcode();
2414   if (Opc == SrcOpc ||
2415       (Opc == TargetOpcode::G_ANYEXT &&
2416        (SrcOpc == TargetOpcode::G_SEXT || SrcOpc == TargetOpcode::G_ZEXT)) ||
2417       (Opc == TargetOpcode::G_SEXT && SrcOpc == TargetOpcode::G_ZEXT)) {
2418     MatchInfo = std::make_tuple(SrcMI->getOperand(1).getReg(), SrcOpc);
2419     return true;
2420   }
2421   return false;
2422 }
2423 
2424 bool CombinerHelper::applyCombineExtOfExt(
2425     MachineInstr &MI, std::tuple<Register, unsigned> &MatchInfo) {
2426   assert((MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2427           MI.getOpcode() == TargetOpcode::G_SEXT ||
2428           MI.getOpcode() == TargetOpcode::G_ZEXT) &&
2429          "Expected a G_[ASZ]EXT");
2430 
2431   Register Reg = std::get<0>(MatchInfo);
2432   unsigned SrcExtOp = std::get<1>(MatchInfo);
2433 
2434   // Combine exts with the same opcode.
2435   if (MI.getOpcode() == SrcExtOp) {
2436     Observer.changingInstr(MI);
2437     MI.getOperand(1).setReg(Reg);
2438     Observer.changedInstr(MI);
2439     return true;
2440   }
2441 
2442   // Combine:
2443   // - anyext([sz]ext x) to [sz]ext x
2444   // - sext(zext x) to zext x
2445   if (MI.getOpcode() == TargetOpcode::G_ANYEXT ||
2446       (MI.getOpcode() == TargetOpcode::G_SEXT &&
2447        SrcExtOp == TargetOpcode::G_ZEXT)) {
2448     Register DstReg = MI.getOperand(0).getReg();
2449     Builder.setInstrAndDebugLoc(MI);
2450     Builder.buildInstr(SrcExtOp, {DstReg}, {Reg});
2451     MI.eraseFromParent();
2452     return true;
2453   }
2454 
2455   return false;
2456 }
2457 
2458 bool CombinerHelper::applyCombineMulByNegativeOne(MachineInstr &MI) {
2459   assert(MI.getOpcode() == TargetOpcode::G_MUL && "Expected a G_MUL");
2460   Register DstReg = MI.getOperand(0).getReg();
2461   Register SrcReg = MI.getOperand(1).getReg();
2462   LLT DstTy = MRI.getType(DstReg);
2463 
2464   Builder.setInstrAndDebugLoc(MI);
2465   Builder.buildSub(DstReg, Builder.buildConstant(DstTy, 0), SrcReg,
2466                    MI.getFlags());
2467   MI.eraseFromParent();
2468   return true;
2469 }
2470 
2471 bool CombinerHelper::matchCombineFNegOfFNeg(MachineInstr &MI, Register &Reg) {
2472   assert(MI.getOpcode() == TargetOpcode::G_FNEG && "Expected a G_FNEG");
2473   Register SrcReg = MI.getOperand(1).getReg();
2474   return mi_match(SrcReg, MRI, m_GFNeg(m_Reg(Reg)));
2475 }
2476 
2477 bool CombinerHelper::matchCombineFAbsOfFAbs(MachineInstr &MI, Register &Src) {
2478   assert(MI.getOpcode() == TargetOpcode::G_FABS && "Expected a G_FABS");
2479   Src = MI.getOperand(1).getReg();
2480   Register AbsSrc;
2481   return mi_match(Src, MRI, m_GFabs(m_Reg(AbsSrc)));
2482 }
2483 
2484 bool CombinerHelper::matchCombineTruncOfExt(
2485     MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2486   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2487   Register SrcReg = MI.getOperand(1).getReg();
2488   MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2489   unsigned SrcOpc = SrcMI->getOpcode();
2490   if (SrcOpc == TargetOpcode::G_ANYEXT || SrcOpc == TargetOpcode::G_SEXT ||
2491       SrcOpc == TargetOpcode::G_ZEXT) {
2492     MatchInfo = std::make_pair(SrcMI->getOperand(1).getReg(), SrcOpc);
2493     return true;
2494   }
2495   return false;
2496 }
2497 
2498 bool CombinerHelper::applyCombineTruncOfExt(
2499     MachineInstr &MI, std::pair<Register, unsigned> &MatchInfo) {
2500   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2501   Register SrcReg = MatchInfo.first;
2502   unsigned SrcExtOp = MatchInfo.second;
2503   Register DstReg = MI.getOperand(0).getReg();
2504   LLT SrcTy = MRI.getType(SrcReg);
2505   LLT DstTy = MRI.getType(DstReg);
2506   if (SrcTy == DstTy) {
2507     MI.eraseFromParent();
2508     replaceRegWith(MRI, DstReg, SrcReg);
2509     return true;
2510   }
2511   Builder.setInstrAndDebugLoc(MI);
2512   if (SrcTy.getSizeInBits() < DstTy.getSizeInBits())
2513     Builder.buildInstr(SrcExtOp, {DstReg}, {SrcReg});
2514   else
2515     Builder.buildTrunc(DstReg, SrcReg);
2516   MI.eraseFromParent();
2517   return true;
2518 }
2519 
2520 bool CombinerHelper::matchCombineTruncOfShl(
2521     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2522   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2523   Register DstReg = MI.getOperand(0).getReg();
2524   Register SrcReg = MI.getOperand(1).getReg();
2525   LLT DstTy = MRI.getType(DstReg);
2526   Register ShiftSrc;
2527   Register ShiftAmt;
2528 
2529   if (MRI.hasOneNonDBGUse(SrcReg) &&
2530       mi_match(SrcReg, MRI, m_GShl(m_Reg(ShiftSrc), m_Reg(ShiftAmt))) &&
2531       isLegalOrBeforeLegalizer(
2532           {TargetOpcode::G_SHL,
2533            {DstTy, getTargetLowering().getPreferredShiftAmountTy(DstTy)}})) {
2534     KnownBits Known = KB->getKnownBits(ShiftAmt);
2535     unsigned Size = DstTy.getSizeInBits();
2536     if (Known.getBitWidth() - Known.countMinLeadingZeros() <= Log2_32(Size)) {
2537       MatchInfo = std::make_pair(ShiftSrc, ShiftAmt);
2538       return true;
2539     }
2540   }
2541   return false;
2542 }
2543 
2544 bool CombinerHelper::applyCombineTruncOfShl(
2545     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
2546   assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Expected a G_TRUNC");
2547   Register DstReg = MI.getOperand(0).getReg();
2548   Register SrcReg = MI.getOperand(1).getReg();
2549   LLT DstTy = MRI.getType(DstReg);
2550   MachineInstr *SrcMI = MRI.getVRegDef(SrcReg);
2551 
2552   Register ShiftSrc = MatchInfo.first;
2553   Register ShiftAmt = MatchInfo.second;
2554   Builder.setInstrAndDebugLoc(MI);
2555   auto TruncShiftSrc = Builder.buildTrunc(DstTy, ShiftSrc);
2556   Builder.buildShl(DstReg, TruncShiftSrc, ShiftAmt, SrcMI->getFlags());
2557   MI.eraseFromParent();
2558   return true;
2559 }
2560 
2561 bool CombinerHelper::matchAnyExplicitUseIsUndef(MachineInstr &MI) {
2562   return any_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2563     return MO.isReg() &&
2564            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2565   });
2566 }
2567 
2568 bool CombinerHelper::matchAllExplicitUsesAreUndef(MachineInstr &MI) {
2569   return all_of(MI.explicit_uses(), [this](const MachineOperand &MO) {
2570     return !MO.isReg() ||
2571            getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2572   });
2573 }
2574 
2575 bool CombinerHelper::matchUndefShuffleVectorMask(MachineInstr &MI) {
2576   assert(MI.getOpcode() == TargetOpcode::G_SHUFFLE_VECTOR);
2577   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
2578   return all_of(Mask, [](int Elt) { return Elt < 0; });
2579 }
2580 
2581 bool CombinerHelper::matchUndefStore(MachineInstr &MI) {
2582   assert(MI.getOpcode() == TargetOpcode::G_STORE);
2583   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(0).getReg(),
2584                       MRI);
2585 }
2586 
2587 bool CombinerHelper::matchUndefSelectCmp(MachineInstr &MI) {
2588   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2589   return getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MI.getOperand(1).getReg(),
2590                       MRI);
2591 }
2592 
2593 bool CombinerHelper::matchConstantSelectCmp(MachineInstr &MI, unsigned &OpIdx) {
2594   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2595   if (auto MaybeCstCmp =
2596           getConstantVRegValWithLookThrough(MI.getOperand(1).getReg(), MRI)) {
2597     OpIdx = MaybeCstCmp->Value.isNullValue() ? 3 : 2;
2598     return true;
2599   }
2600   return false;
2601 }
2602 
2603 bool CombinerHelper::eraseInst(MachineInstr &MI) {
2604   MI.eraseFromParent();
2605   return true;
2606 }
2607 
2608 bool CombinerHelper::matchEqualDefs(const MachineOperand &MOP1,
2609                                     const MachineOperand &MOP2) {
2610   if (!MOP1.isReg() || !MOP2.isReg())
2611     return false;
2612   MachineInstr *I1 = getDefIgnoringCopies(MOP1.getReg(), MRI);
2613   if (!I1)
2614     return false;
2615   MachineInstr *I2 = getDefIgnoringCopies(MOP2.getReg(), MRI);
2616   if (!I2)
2617     return false;
2618 
2619   // Handle a case like this:
2620   //
2621   // %0:_(s64), %1:_(s64) = G_UNMERGE_VALUES %2:_(<2 x s64>)
2622   //
2623   // Even though %0 and %1 are produced by the same instruction they are not
2624   // the same values.
2625   if (I1 == I2)
2626     return MOP1.getReg() == MOP2.getReg();
2627 
2628   // If we have an instruction which loads or stores, we can't guarantee that
2629   // it is identical.
2630   //
2631   // For example, we may have
2632   //
2633   // %x1 = G_LOAD %addr (load N from @somewhere)
2634   // ...
2635   // call @foo
2636   // ...
2637   // %x2 = G_LOAD %addr (load N from @somewhere)
2638   // ...
2639   // %or = G_OR %x1, %x2
2640   //
2641   // It's possible that @foo will modify whatever lives at the address we're
2642   // loading from. To be safe, let's just assume that all loads and stores
2643   // are different (unless we have something which is guaranteed to not
2644   // change.)
2645   if (I1->mayLoadOrStore() && !I1->isDereferenceableInvariantLoad(nullptr))
2646     return false;
2647 
2648   // Check for physical registers on the instructions first to avoid cases
2649   // like this:
2650   //
2651   // %a = COPY $physreg
2652   // ...
2653   // SOMETHING implicit-def $physreg
2654   // ...
2655   // %b = COPY $physreg
2656   //
2657   // These copies are not equivalent.
2658   if (any_of(I1->uses(), [](const MachineOperand &MO) {
2659         return MO.isReg() && MO.getReg().isPhysical();
2660       })) {
2661     // Check if we have a case like this:
2662     //
2663     // %a = COPY $physreg
2664     // %b = COPY %a
2665     //
2666     // In this case, I1 and I2 will both be equal to %a = COPY $physreg.
2667     // From that, we know that they must have the same value, since they must
2668     // have come from the same COPY.
2669     return I1->isIdenticalTo(*I2);
2670   }
2671 
2672   // We don't have any physical registers, so we don't necessarily need the
2673   // same vreg defs.
2674   //
2675   // On the off-chance that there's some target instruction feeding into the
2676   // instruction, let's use produceSameValue instead of isIdenticalTo.
2677   return Builder.getTII().produceSameValue(*I1, *I2, &MRI);
2678 }
2679 
2680 bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
2681   if (!MOP.isReg())
2682     return false;
2683   // MIPatternMatch doesn't let us look through G_ZEXT etc.
2684   auto ValAndVReg = getConstantVRegValWithLookThrough(MOP.getReg(), MRI);
2685   return ValAndVReg && ValAndVReg->Value == C;
2686 }
2687 
2688 bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
2689                                                      unsigned OpIdx) {
2690   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2691   Register OldReg = MI.getOperand(0).getReg();
2692   Register Replacement = MI.getOperand(OpIdx).getReg();
2693   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2694   MI.eraseFromParent();
2695   replaceRegWith(MRI, OldReg, Replacement);
2696   return true;
2697 }
2698 
2699 bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
2700                                                  Register Replacement) {
2701   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
2702   Register OldReg = MI.getOperand(0).getReg();
2703   assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
2704   MI.eraseFromParent();
2705   replaceRegWith(MRI, OldReg, Replacement);
2706   return true;
2707 }
2708 
2709 bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
2710   assert(MI.getOpcode() == TargetOpcode::G_SELECT);
2711   // Match (cond ? x : x)
2712   return matchEqualDefs(MI.getOperand(2), MI.getOperand(3)) &&
2713          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(2).getReg(),
2714                        MRI);
2715 }
2716 
2717 bool CombinerHelper::matchBinOpSameVal(MachineInstr &MI) {
2718   return matchEqualDefs(MI.getOperand(1), MI.getOperand(2)) &&
2719          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(1).getReg(),
2720                        MRI);
2721 }
2722 
2723 bool CombinerHelper::matchOperandIsZero(MachineInstr &MI, unsigned OpIdx) {
2724   return matchConstantOp(MI.getOperand(OpIdx), 0) &&
2725          canReplaceReg(MI.getOperand(0).getReg(), MI.getOperand(OpIdx).getReg(),
2726                        MRI);
2727 }
2728 
2729 bool CombinerHelper::matchOperandIsUndef(MachineInstr &MI, unsigned OpIdx) {
2730   MachineOperand &MO = MI.getOperand(OpIdx);
2731   return MO.isReg() &&
2732          getOpcodeDef(TargetOpcode::G_IMPLICIT_DEF, MO.getReg(), MRI);
2733 }
2734 
2735 bool CombinerHelper::matchOperandIsKnownToBeAPowerOfTwo(MachineInstr &MI,
2736                                                         unsigned OpIdx) {
2737   MachineOperand &MO = MI.getOperand(OpIdx);
2738   return isKnownToBeAPowerOfTwo(MO.getReg(), MRI, KB);
2739 }
2740 
2741 bool CombinerHelper::replaceInstWithFConstant(MachineInstr &MI, double C) {
2742   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2743   Builder.setInstr(MI);
2744   Builder.buildFConstant(MI.getOperand(0), C);
2745   MI.eraseFromParent();
2746   return true;
2747 }
2748 
2749 bool CombinerHelper::replaceInstWithConstant(MachineInstr &MI, int64_t C) {
2750   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2751   Builder.setInstr(MI);
2752   Builder.buildConstant(MI.getOperand(0), C);
2753   MI.eraseFromParent();
2754   return true;
2755 }
2756 
2757 bool CombinerHelper::replaceInstWithUndef(MachineInstr &MI) {
2758   assert(MI.getNumDefs() == 1 && "Expected only one def?");
2759   Builder.setInstr(MI);
2760   Builder.buildUndef(MI.getOperand(0));
2761   MI.eraseFromParent();
2762   return true;
2763 }
2764 
2765 bool CombinerHelper::matchSimplifyAddToSub(
2766     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2767   Register LHS = MI.getOperand(1).getReg();
2768   Register RHS = MI.getOperand(2).getReg();
2769   Register &NewLHS = std::get<0>(MatchInfo);
2770   Register &NewRHS = std::get<1>(MatchInfo);
2771 
2772   // Helper lambda to check for opportunities for
2773   // ((0-A) + B) -> B - A
2774   // (A + (0-B)) -> A - B
2775   auto CheckFold = [&](Register &MaybeSub, Register &MaybeNewLHS) {
2776     if (!mi_match(MaybeSub, MRI, m_Neg(m_Reg(NewRHS))))
2777       return false;
2778     NewLHS = MaybeNewLHS;
2779     return true;
2780   };
2781 
2782   return CheckFold(LHS, RHS) || CheckFold(RHS, LHS);
2783 }
2784 
2785 bool CombinerHelper::matchCombineInsertVecElts(
2786     MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2787   assert(MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT &&
2788          "Invalid opcode");
2789   Register DstReg = MI.getOperand(0).getReg();
2790   LLT DstTy = MRI.getType(DstReg);
2791   assert(DstTy.isVector() && "Invalid G_INSERT_VECTOR_ELT?");
2792   unsigned NumElts = DstTy.getNumElements();
2793   // If this MI is part of a sequence of insert_vec_elts, then
2794   // don't do the combine in the middle of the sequence.
2795   if (MRI.hasOneUse(DstReg) && MRI.use_instr_begin(DstReg)->getOpcode() ==
2796                                    TargetOpcode::G_INSERT_VECTOR_ELT)
2797     return false;
2798   MachineInstr *CurrInst = &MI;
2799   MachineInstr *TmpInst;
2800   int64_t IntImm;
2801   Register TmpReg;
2802   MatchInfo.resize(NumElts);
2803   while (mi_match(
2804       CurrInst->getOperand(0).getReg(), MRI,
2805       m_GInsertVecElt(m_MInstr(TmpInst), m_Reg(TmpReg), m_ICst(IntImm)))) {
2806     if (IntImm >= NumElts)
2807       return false;
2808     if (!MatchInfo[IntImm])
2809       MatchInfo[IntImm] = TmpReg;
2810     CurrInst = TmpInst;
2811   }
2812   // Variable index.
2813   if (CurrInst->getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
2814     return false;
2815   if (TmpInst->getOpcode() == TargetOpcode::G_BUILD_VECTOR) {
2816     for (unsigned I = 1; I < TmpInst->getNumOperands(); ++I) {
2817       if (!MatchInfo[I - 1].isValid())
2818         MatchInfo[I - 1] = TmpInst->getOperand(I).getReg();
2819     }
2820     return true;
2821   }
2822   // If we didn't end in a G_IMPLICIT_DEF, bail out.
2823   return TmpInst->getOpcode() == TargetOpcode::G_IMPLICIT_DEF;
2824 }
2825 
2826 bool CombinerHelper::applyCombineInsertVecElts(
2827     MachineInstr &MI, SmallVectorImpl<Register> &MatchInfo) {
2828   Builder.setInstr(MI);
2829   Register UndefReg;
2830   auto GetUndef = [&]() {
2831     if (UndefReg)
2832       return UndefReg;
2833     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
2834     UndefReg = Builder.buildUndef(DstTy.getScalarType()).getReg(0);
2835     return UndefReg;
2836   };
2837   for (unsigned I = 0; I < MatchInfo.size(); ++I) {
2838     if (!MatchInfo[I])
2839       MatchInfo[I] = GetUndef();
2840   }
2841   Builder.buildBuildVector(MI.getOperand(0).getReg(), MatchInfo);
2842   MI.eraseFromParent();
2843   return true;
2844 }
2845 
2846 bool CombinerHelper::applySimplifyAddToSub(
2847     MachineInstr &MI, std::tuple<Register, Register> &MatchInfo) {
2848   Builder.setInstr(MI);
2849   Register SubLHS, SubRHS;
2850   std::tie(SubLHS, SubRHS) = MatchInfo;
2851   Builder.buildSub(MI.getOperand(0).getReg(), SubLHS, SubRHS);
2852   MI.eraseFromParent();
2853   return true;
2854 }
2855 
2856 bool CombinerHelper::matchHoistLogicOpWithSameOpcodeHands(
2857     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2858   // Matches: logic (hand x, ...), (hand y, ...) -> hand (logic x, y), ...
2859   //
2860   // Creates the new hand + logic instruction (but does not insert them.)
2861   //
2862   // On success, MatchInfo is populated with the new instructions. These are
2863   // inserted in applyHoistLogicOpWithSameOpcodeHands.
2864   unsigned LogicOpcode = MI.getOpcode();
2865   assert(LogicOpcode == TargetOpcode::G_AND ||
2866          LogicOpcode == TargetOpcode::G_OR ||
2867          LogicOpcode == TargetOpcode::G_XOR);
2868   MachineIRBuilder MIB(MI);
2869   Register Dst = MI.getOperand(0).getReg();
2870   Register LHSReg = MI.getOperand(1).getReg();
2871   Register RHSReg = MI.getOperand(2).getReg();
2872 
2873   // Don't recompute anything.
2874   if (!MRI.hasOneNonDBGUse(LHSReg) || !MRI.hasOneNonDBGUse(RHSReg))
2875     return false;
2876 
2877   // Make sure we have (hand x, ...), (hand y, ...)
2878   MachineInstr *LeftHandInst = getDefIgnoringCopies(LHSReg, MRI);
2879   MachineInstr *RightHandInst = getDefIgnoringCopies(RHSReg, MRI);
2880   if (!LeftHandInst || !RightHandInst)
2881     return false;
2882   unsigned HandOpcode = LeftHandInst->getOpcode();
2883   if (HandOpcode != RightHandInst->getOpcode())
2884     return false;
2885   if (!LeftHandInst->getOperand(1).isReg() ||
2886       !RightHandInst->getOperand(1).isReg())
2887     return false;
2888 
2889   // Make sure the types match up, and if we're doing this post-legalization,
2890   // we end up with legal types.
2891   Register X = LeftHandInst->getOperand(1).getReg();
2892   Register Y = RightHandInst->getOperand(1).getReg();
2893   LLT XTy = MRI.getType(X);
2894   LLT YTy = MRI.getType(Y);
2895   if (XTy != YTy)
2896     return false;
2897   if (!isLegalOrBeforeLegalizer({LogicOpcode, {XTy, YTy}}))
2898     return false;
2899 
2900   // Optional extra source register.
2901   Register ExtraHandOpSrcReg;
2902   switch (HandOpcode) {
2903   default:
2904     return false;
2905   case TargetOpcode::G_ANYEXT:
2906   case TargetOpcode::G_SEXT:
2907   case TargetOpcode::G_ZEXT: {
2908     // Match: logic (ext X), (ext Y) --> ext (logic X, Y)
2909     break;
2910   }
2911   case TargetOpcode::G_AND:
2912   case TargetOpcode::G_ASHR:
2913   case TargetOpcode::G_LSHR:
2914   case TargetOpcode::G_SHL: {
2915     // Match: logic (binop x, z), (binop y, z) -> binop (logic x, y), z
2916     MachineOperand &ZOp = LeftHandInst->getOperand(2);
2917     if (!matchEqualDefs(ZOp, RightHandInst->getOperand(2)))
2918       return false;
2919     ExtraHandOpSrcReg = ZOp.getReg();
2920     break;
2921   }
2922   }
2923 
2924   // Record the steps to build the new instructions.
2925   //
2926   // Steps to build (logic x, y)
2927   auto NewLogicDst = MRI.createGenericVirtualRegister(XTy);
2928   OperandBuildSteps LogicBuildSteps = {
2929       [=](MachineInstrBuilder &MIB) { MIB.addDef(NewLogicDst); },
2930       [=](MachineInstrBuilder &MIB) { MIB.addReg(X); },
2931       [=](MachineInstrBuilder &MIB) { MIB.addReg(Y); }};
2932   InstructionBuildSteps LogicSteps(LogicOpcode, LogicBuildSteps);
2933 
2934   // Steps to build hand (logic x, y), ...z
2935   OperandBuildSteps HandBuildSteps = {
2936       [=](MachineInstrBuilder &MIB) { MIB.addDef(Dst); },
2937       [=](MachineInstrBuilder &MIB) { MIB.addReg(NewLogicDst); }};
2938   if (ExtraHandOpSrcReg.isValid())
2939     HandBuildSteps.push_back(
2940         [=](MachineInstrBuilder &MIB) { MIB.addReg(ExtraHandOpSrcReg); });
2941   InstructionBuildSteps HandSteps(HandOpcode, HandBuildSteps);
2942 
2943   MatchInfo = InstructionStepsMatchInfo({LogicSteps, HandSteps});
2944   return true;
2945 }
2946 
2947 bool CombinerHelper::applyBuildInstructionSteps(
2948     MachineInstr &MI, InstructionStepsMatchInfo &MatchInfo) {
2949   assert(MatchInfo.InstrsToBuild.size() &&
2950          "Expected at least one instr to build?");
2951   Builder.setInstr(MI);
2952   for (auto &InstrToBuild : MatchInfo.InstrsToBuild) {
2953     assert(InstrToBuild.Opcode && "Expected a valid opcode?");
2954     assert(InstrToBuild.OperandFns.size() && "Expected at least one operand?");
2955     MachineInstrBuilder Instr = Builder.buildInstr(InstrToBuild.Opcode);
2956     for (auto &OperandFn : InstrToBuild.OperandFns)
2957       OperandFn(Instr);
2958   }
2959   MI.eraseFromParent();
2960   return true;
2961 }
2962 
2963 bool CombinerHelper::matchAshrShlToSextInreg(
2964     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
2965   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2966   int64_t ShlCst, AshrCst;
2967   Register Src;
2968   // FIXME: detect splat constant vectors.
2969   if (!mi_match(MI.getOperand(0).getReg(), MRI,
2970                 m_GAShr(m_GShl(m_Reg(Src), m_ICst(ShlCst)), m_ICst(AshrCst))))
2971     return false;
2972   if (ShlCst != AshrCst)
2973     return false;
2974   if (!isLegalOrBeforeLegalizer(
2975           {TargetOpcode::G_SEXT_INREG, {MRI.getType(Src)}}))
2976     return false;
2977   MatchInfo = std::make_tuple(Src, ShlCst);
2978   return true;
2979 }
2980 bool CombinerHelper::applyAshShlToSextInreg(
2981     MachineInstr &MI, std::tuple<Register, int64_t> &MatchInfo) {
2982   assert(MI.getOpcode() == TargetOpcode::G_ASHR);
2983   Register Src;
2984   int64_t ShiftAmt;
2985   std::tie(Src, ShiftAmt) = MatchInfo;
2986   unsigned Size = MRI.getType(Src).getScalarSizeInBits();
2987   Builder.setInstrAndDebugLoc(MI);
2988   Builder.buildSExtInReg(MI.getOperand(0).getReg(), Src, Size - ShiftAmt);
2989   MI.eraseFromParent();
2990   return true;
2991 }
2992 
2993 bool CombinerHelper::matchRedundantAnd(MachineInstr &MI,
2994                                        Register &Replacement) {
2995   // Given
2996   //
2997   // %y:_(sN) = G_SOMETHING
2998   // %x:_(sN) = G_SOMETHING
2999   // %res:_(sN) = G_AND %x, %y
3000   //
3001   // Eliminate the G_AND when it is known that x & y == x or x & y == y.
3002   //
3003   // Patterns like this can appear as a result of legalization. E.g.
3004   //
3005   // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
3006   // %one:_(s32) = G_CONSTANT i32 1
3007   // %and:_(s32) = G_AND %cmp, %one
3008   //
3009   // In this case, G_ICMP only produces a single bit, so x & 1 == x.
3010   assert(MI.getOpcode() == TargetOpcode::G_AND);
3011   if (!KB)
3012     return false;
3013 
3014   Register AndDst = MI.getOperand(0).getReg();
3015   LLT DstTy = MRI.getType(AndDst);
3016 
3017   // FIXME: This should be removed once GISelKnownBits supports vectors.
3018   if (DstTy.isVector())
3019     return false;
3020 
3021   Register LHS = MI.getOperand(1).getReg();
3022   Register RHS = MI.getOperand(2).getReg();
3023   KnownBits LHSBits = KB->getKnownBits(LHS);
3024   KnownBits RHSBits = KB->getKnownBits(RHS);
3025 
3026   // Check that x & Mask == x.
3027   // x & 1 == x, always
3028   // x & 0 == x, only if x is also 0
3029   // Meaning Mask has no effect if every bit is either one in Mask or zero in x.
3030   //
3031   // Check if we can replace AndDst with the LHS of the G_AND
3032   if (canReplaceReg(AndDst, LHS, MRI) &&
3033       (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
3034     Replacement = LHS;
3035     return true;
3036   }
3037 
3038   // Check if we can replace AndDst with the RHS of the G_AND
3039   if (canReplaceReg(AndDst, RHS, MRI) &&
3040       (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
3041     Replacement = RHS;
3042     return true;
3043   }
3044 
3045   return false;
3046 }
3047 
3048 bool CombinerHelper::matchRedundantOr(MachineInstr &MI, Register &Replacement) {
3049   // Given
3050   //
3051   // %y:_(sN) = G_SOMETHING
3052   // %x:_(sN) = G_SOMETHING
3053   // %res:_(sN) = G_OR %x, %y
3054   //
3055   // Eliminate the G_OR when it is known that x | y == x or x | y == y.
3056   assert(MI.getOpcode() == TargetOpcode::G_OR);
3057   if (!KB)
3058     return false;
3059 
3060   Register OrDst = MI.getOperand(0).getReg();
3061   LLT DstTy = MRI.getType(OrDst);
3062 
3063   // FIXME: This should be removed once GISelKnownBits supports vectors.
3064   if (DstTy.isVector())
3065     return false;
3066 
3067   Register LHS = MI.getOperand(1).getReg();
3068   Register RHS = MI.getOperand(2).getReg();
3069   KnownBits LHSBits = KB->getKnownBits(LHS);
3070   KnownBits RHSBits = KB->getKnownBits(RHS);
3071 
3072   // Check that x | Mask == x.
3073   // x | 0 == x, always
3074   // x | 1 == x, only if x is also 1
3075   // Meaning Mask has no effect if every bit is either zero in Mask or one in x.
3076   //
3077   // Check if we can replace OrDst with the LHS of the G_OR
3078   if (canReplaceReg(OrDst, LHS, MRI) &&
3079       (LHSBits.One | RHSBits.Zero).isAllOnesValue()) {
3080     Replacement = LHS;
3081     return true;
3082   }
3083 
3084   // Check if we can replace OrDst with the RHS of the G_OR
3085   if (canReplaceReg(OrDst, RHS, MRI) &&
3086       (LHSBits.Zero | RHSBits.One).isAllOnesValue()) {
3087     Replacement = RHS;
3088     return true;
3089   }
3090 
3091   return false;
3092 }
3093 
3094 bool CombinerHelper::matchRedundantSExtInReg(MachineInstr &MI) {
3095   // If the input is already sign extended, just drop the extension.
3096   Register Src = MI.getOperand(1).getReg();
3097   unsigned ExtBits = MI.getOperand(2).getImm();
3098   unsigned TypeSize = MRI.getType(Src).getScalarSizeInBits();
3099   return KB->computeNumSignBits(Src) >= (TypeSize - ExtBits + 1);
3100 }
3101 
3102 static bool isConstValidTrue(const TargetLowering &TLI, unsigned ScalarSizeBits,
3103                              int64_t Cst, bool IsVector, bool IsFP) {
3104   // For i1, Cst will always be -1 regardless of boolean contents.
3105   return (ScalarSizeBits == 1 && Cst == -1) ||
3106          isConstTrueVal(TLI, Cst, IsVector, IsFP);
3107 }
3108 
3109 bool CombinerHelper::matchNotCmp(MachineInstr &MI,
3110                                  SmallVectorImpl<Register> &RegsToNegate) {
3111   assert(MI.getOpcode() == TargetOpcode::G_XOR);
3112   LLT Ty = MRI.getType(MI.getOperand(0).getReg());
3113   const auto &TLI = *Builder.getMF().getSubtarget().getTargetLowering();
3114   Register XorSrc;
3115   Register CstReg;
3116   // We match xor(src, true) here.
3117   if (!mi_match(MI.getOperand(0).getReg(), MRI,
3118                 m_GXor(m_Reg(XorSrc), m_Reg(CstReg))))
3119     return false;
3120 
3121   if (!MRI.hasOneNonDBGUse(XorSrc))
3122     return false;
3123 
3124   // Check that XorSrc is the root of a tree of comparisons combined with ANDs
3125   // and ORs. The suffix of RegsToNegate starting from index I is used a work
3126   // list of tree nodes to visit.
3127   RegsToNegate.push_back(XorSrc);
3128   // Remember whether the comparisons are all integer or all floating point.
3129   bool IsInt = false;
3130   bool IsFP = false;
3131   for (unsigned I = 0; I < RegsToNegate.size(); ++I) {
3132     Register Reg = RegsToNegate[I];
3133     if (!MRI.hasOneNonDBGUse(Reg))
3134       return false;
3135     MachineInstr *Def = MRI.getVRegDef(Reg);
3136     switch (Def->getOpcode()) {
3137     default:
3138       // Don't match if the tree contains anything other than ANDs, ORs and
3139       // comparisons.
3140       return false;
3141     case TargetOpcode::G_ICMP:
3142       if (IsFP)
3143         return false;
3144       IsInt = true;
3145       // When we apply the combine we will invert the predicate.
3146       break;
3147     case TargetOpcode::G_FCMP:
3148       if (IsInt)
3149         return false;
3150       IsFP = true;
3151       // When we apply the combine we will invert the predicate.
3152       break;
3153     case TargetOpcode::G_AND:
3154     case TargetOpcode::G_OR:
3155       // Implement De Morgan's laws:
3156       // ~(x & y) -> ~x | ~y
3157       // ~(x | y) -> ~x & ~y
3158       // When we apply the combine we will change the opcode and recursively
3159       // negate the operands.
3160       RegsToNegate.push_back(Def->getOperand(1).getReg());
3161       RegsToNegate.push_back(Def->getOperand(2).getReg());
3162       break;
3163     }
3164   }
3165 
3166   // Now we know whether the comparisons are integer or floating point, check
3167   // the constant in the xor.
3168   int64_t Cst;
3169   if (Ty.isVector()) {
3170     MachineInstr *CstDef = MRI.getVRegDef(CstReg);
3171     auto MaybeCst = getBuildVectorConstantSplat(*CstDef, MRI);
3172     if (!MaybeCst)
3173       return false;
3174     if (!isConstValidTrue(TLI, Ty.getScalarSizeInBits(), *MaybeCst, true, IsFP))
3175       return false;
3176   } else {
3177     if (!mi_match(CstReg, MRI, m_ICst(Cst)))
3178       return false;
3179     if (!isConstValidTrue(TLI, Ty.getSizeInBits(), Cst, false, IsFP))
3180       return false;
3181   }
3182 
3183   return true;
3184 }
3185 
3186 bool CombinerHelper::applyNotCmp(MachineInstr &MI,
3187                                  SmallVectorImpl<Register> &RegsToNegate) {
3188   for (Register Reg : RegsToNegate) {
3189     MachineInstr *Def = MRI.getVRegDef(Reg);
3190     Observer.changingInstr(*Def);
3191     // For each comparison, invert the opcode. For each AND and OR, change the
3192     // opcode.
3193     switch (Def->getOpcode()) {
3194     default:
3195       llvm_unreachable("Unexpected opcode");
3196     case TargetOpcode::G_ICMP:
3197     case TargetOpcode::G_FCMP: {
3198       MachineOperand &PredOp = Def->getOperand(1);
3199       CmpInst::Predicate NewP = CmpInst::getInversePredicate(
3200           (CmpInst::Predicate)PredOp.getPredicate());
3201       PredOp.setPredicate(NewP);
3202       break;
3203     }
3204     case TargetOpcode::G_AND:
3205       Def->setDesc(Builder.getTII().get(TargetOpcode::G_OR));
3206       break;
3207     case TargetOpcode::G_OR:
3208       Def->setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3209       break;
3210     }
3211     Observer.changedInstr(*Def);
3212   }
3213 
3214   replaceRegWith(MRI, MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
3215   MI.eraseFromParent();
3216   return true;
3217 }
3218 
3219 bool CombinerHelper::matchXorOfAndWithSameReg(
3220     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3221   // Match (xor (and x, y), y) (or any of its commuted cases)
3222   assert(MI.getOpcode() == TargetOpcode::G_XOR);
3223   Register &X = MatchInfo.first;
3224   Register &Y = MatchInfo.second;
3225   Register AndReg = MI.getOperand(1).getReg();
3226   Register SharedReg = MI.getOperand(2).getReg();
3227 
3228   // Find a G_AND on either side of the G_XOR.
3229   // Look for one of
3230   //
3231   // (xor (and x, y), SharedReg)
3232   // (xor SharedReg, (and x, y))
3233   if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y)))) {
3234     std::swap(AndReg, SharedReg);
3235     if (!mi_match(AndReg, MRI, m_GAnd(m_Reg(X), m_Reg(Y))))
3236       return false;
3237   }
3238 
3239   // Only do this if we'll eliminate the G_AND.
3240   if (!MRI.hasOneNonDBGUse(AndReg))
3241     return false;
3242 
3243   // We can combine if SharedReg is the same as either the LHS or RHS of the
3244   // G_AND.
3245   if (Y != SharedReg)
3246     std::swap(X, Y);
3247   return Y == SharedReg;
3248 }
3249 
3250 bool CombinerHelper::applyXorOfAndWithSameReg(
3251     MachineInstr &MI, std::pair<Register, Register> &MatchInfo) {
3252   // Fold (xor (and x, y), y) -> (and (not x), y)
3253   Builder.setInstrAndDebugLoc(MI);
3254   Register X, Y;
3255   std::tie(X, Y) = MatchInfo;
3256   auto Not = Builder.buildNot(MRI.getType(X), X);
3257   Observer.changingInstr(MI);
3258   MI.setDesc(Builder.getTII().get(TargetOpcode::G_AND));
3259   MI.getOperand(1).setReg(Not->getOperand(0).getReg());
3260   MI.getOperand(2).setReg(Y);
3261   Observer.changedInstr(MI);
3262   return true;
3263 }
3264 
3265 bool CombinerHelper::matchPtrAddZero(MachineInstr &MI) {
3266   Register DstReg = MI.getOperand(0).getReg();
3267   LLT Ty = MRI.getType(DstReg);
3268   const DataLayout &DL = Builder.getMF().getDataLayout();
3269 
3270   if (DL.isNonIntegralAddressSpace(Ty.getScalarType().getAddressSpace()))
3271     return false;
3272 
3273   if (Ty.isPointer()) {
3274     auto ConstVal = getConstantVRegVal(MI.getOperand(1).getReg(), MRI);
3275     return ConstVal && *ConstVal == 0;
3276   }
3277 
3278   assert(Ty.isVector() && "Expecting a vector type");
3279   const MachineInstr *VecMI = MRI.getVRegDef(MI.getOperand(1).getReg());
3280   return isBuildVectorAllZeros(*VecMI, MRI);
3281 }
3282 
3283 bool CombinerHelper::applyPtrAddZero(MachineInstr &MI) {
3284   assert(MI.getOpcode() == TargetOpcode::G_PTR_ADD);
3285   Builder.setInstrAndDebugLoc(MI);
3286   Builder.buildIntToPtr(MI.getOperand(0), MI.getOperand(2));
3287   MI.eraseFromParent();
3288   return true;
3289 }
3290 
3291 /// The second source operand is known to be a power of 2.
3292 bool CombinerHelper::applySimplifyURemByPow2(MachineInstr &MI) {
3293   Register DstReg = MI.getOperand(0).getReg();
3294   Register Src0 = MI.getOperand(1).getReg();
3295   Register Pow2Src1 = MI.getOperand(2).getReg();
3296   LLT Ty = MRI.getType(DstReg);
3297   Builder.setInstrAndDebugLoc(MI);
3298 
3299   // Fold (urem x, pow2) -> (and x, pow2-1)
3300   auto NegOne = Builder.buildConstant(Ty, -1);
3301   auto Add = Builder.buildAdd(Ty, Pow2Src1, NegOne);
3302   Builder.buildAnd(DstReg, Src0, Add);
3303   MI.eraseFromParent();
3304   return true;
3305 }
3306 
3307 Optional<SmallVector<Register, 8>>
3308 CombinerHelper::findCandidatesForLoadOrCombine(const MachineInstr *Root) const {
3309   assert(Root->getOpcode() == TargetOpcode::G_OR && "Expected G_OR only!");
3310   // We want to detect if Root is part of a tree which represents a bunch
3311   // of loads being merged into a larger load. We'll try to recognize patterns
3312   // like, for example:
3313   //
3314   //  Reg   Reg
3315   //   \    /
3316   //    OR_1   Reg
3317   //     \    /
3318   //      OR_2
3319   //        \     Reg
3320   //         .. /
3321   //        Root
3322   //
3323   //  Reg   Reg   Reg   Reg
3324   //     \ /       \   /
3325   //     OR_1      OR_2
3326   //       \       /
3327   //        \    /
3328   //         ...
3329   //         Root
3330   //
3331   // Each "Reg" may have been produced by a load + some arithmetic. This
3332   // function will save each of them.
3333   SmallVector<Register, 8> RegsToVisit;
3334   SmallVector<const MachineInstr *, 7> Ors = {Root};
3335 
3336   // In the "worst" case, we're dealing with a load for each byte. So, there
3337   // are at most #bytes - 1 ORs.
3338   const unsigned MaxIter =
3339       MRI.getType(Root->getOperand(0).getReg()).getSizeInBytes() - 1;
3340   for (unsigned Iter = 0; Iter < MaxIter; ++Iter) {
3341     if (Ors.empty())
3342       break;
3343     const MachineInstr *Curr = Ors.pop_back_val();
3344     Register OrLHS = Curr->getOperand(1).getReg();
3345     Register OrRHS = Curr->getOperand(2).getReg();
3346 
3347     // In the combine, we want to elimate the entire tree.
3348     if (!MRI.hasOneNonDBGUse(OrLHS) || !MRI.hasOneNonDBGUse(OrRHS))
3349       return None;
3350 
3351     // If it's a G_OR, save it and continue to walk. If it's not, then it's
3352     // something that may be a load + arithmetic.
3353     if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrLHS, MRI))
3354       Ors.push_back(Or);
3355     else
3356       RegsToVisit.push_back(OrLHS);
3357     if (const MachineInstr *Or = getOpcodeDef(TargetOpcode::G_OR, OrRHS, MRI))
3358       Ors.push_back(Or);
3359     else
3360       RegsToVisit.push_back(OrRHS);
3361   }
3362 
3363   // We're going to try and merge each register into a wider power-of-2 type,
3364   // so we ought to have an even number of registers.
3365   if (RegsToVisit.empty() || RegsToVisit.size() % 2 != 0)
3366     return None;
3367   return RegsToVisit;
3368 }
3369 
3370 /// Helper function for findLoadOffsetsForLoadOrCombine.
3371 ///
3372 /// Check if \p Reg is the result of loading a \p MemSizeInBits wide value,
3373 /// and then moving that value into a specific byte offset.
3374 ///
3375 /// e.g. x[i] << 24
3376 ///
3377 /// \returns The load instruction and the byte offset it is moved into.
3378 static Optional<std::pair<MachineInstr *, int64_t>>
3379 matchLoadAndBytePosition(Register Reg, unsigned MemSizeInBits,
3380                          const MachineRegisterInfo &MRI) {
3381   assert(MRI.hasOneNonDBGUse(Reg) &&
3382          "Expected Reg to only have one non-debug use?");
3383   Register MaybeLoad;
3384   int64_t Shift;
3385   if (!mi_match(Reg, MRI,
3386                 m_OneNonDBGUse(m_GShl(m_Reg(MaybeLoad), m_ICst(Shift))))) {
3387     Shift = 0;
3388     MaybeLoad = Reg;
3389   }
3390 
3391   if (Shift % MemSizeInBits != 0)
3392     return None;
3393 
3394   // TODO: Handle other types of loads.
3395   auto *Load = getOpcodeDef(TargetOpcode::G_ZEXTLOAD, MaybeLoad, MRI);
3396   if (!Load)
3397     return None;
3398 
3399   const auto &MMO = **Load->memoperands_begin();
3400   if (!MMO.isUnordered() || MMO.getSizeInBits() != MemSizeInBits)
3401     return None;
3402 
3403   return std::make_pair(Load, Shift / MemSizeInBits);
3404 }
3405 
3406 Optional<std::pair<MachineInstr *, int64_t>>
3407 CombinerHelper::findLoadOffsetsForLoadOrCombine(
3408     SmallDenseMap<int64_t, int64_t, 8> &MemOffset2Idx,
3409     const SmallVector<Register, 8> &RegsToVisit, const unsigned MemSizeInBits) {
3410 
3411   // Each load found for the pattern. There should be one for each RegsToVisit.
3412   SmallSetVector<const MachineInstr *, 8> Loads;
3413 
3414   // The lowest index used in any load. (The lowest "i" for each x[i].)
3415   int64_t LowestIdx = INT64_MAX;
3416 
3417   // The load which uses the lowest index.
3418   MachineInstr *LowestIdxLoad = nullptr;
3419 
3420   // Keeps track of the load indices we see. We shouldn't see any indices twice.
3421   SmallSet<int64_t, 8> SeenIdx;
3422 
3423   // Ensure each load is in the same MBB.
3424   // TODO: Support multiple MachineBasicBlocks.
3425   MachineBasicBlock *MBB = nullptr;
3426   const MachineMemOperand *MMO = nullptr;
3427 
3428   // Earliest instruction-order load in the pattern.
3429   MachineInstr *EarliestLoad = nullptr;
3430 
3431   // Latest instruction-order load in the pattern.
3432   MachineInstr *LatestLoad = nullptr;
3433 
3434   // Base pointer which every load should share.
3435   Register BasePtr;
3436 
3437   // We want to find a load for each register. Each load should have some
3438   // appropriate bit twiddling arithmetic. During this loop, we will also keep
3439   // track of the load which uses the lowest index. Later, we will check if we
3440   // can use its pointer in the final, combined load.
3441   for (auto Reg : RegsToVisit) {
3442     // Find the load, and find the position that it will end up in (e.g. a
3443     // shifted) value.
3444     auto LoadAndPos = matchLoadAndBytePosition(Reg, MemSizeInBits, MRI);
3445     if (!LoadAndPos)
3446       return None;
3447     MachineInstr *Load;
3448     int64_t DstPos;
3449     std::tie(Load, DstPos) = *LoadAndPos;
3450 
3451     // TODO: Handle multiple MachineBasicBlocks. Currently not handled because
3452     // it is difficult to check for stores/calls/etc between loads.
3453     MachineBasicBlock *LoadMBB = Load->getParent();
3454     if (!MBB)
3455       MBB = LoadMBB;
3456     if (LoadMBB != MBB)
3457       return None;
3458 
3459     // Make sure that the MachineMemOperands of every seen load are compatible.
3460     const MachineMemOperand *LoadMMO = *Load->memoperands_begin();
3461     if (!MMO)
3462       MMO = LoadMMO;
3463     if (MMO->getAddrSpace() != LoadMMO->getAddrSpace())
3464       return None;
3465 
3466     // Find out what the base pointer and index for the load is.
3467     Register LoadPtr;
3468     int64_t Idx;
3469     if (!mi_match(Load->getOperand(1).getReg(), MRI,
3470                   m_GPtrAdd(m_Reg(LoadPtr), m_ICst(Idx)))) {
3471       LoadPtr = Load->getOperand(1).getReg();
3472       Idx = 0;
3473     }
3474 
3475     // Don't combine things like a[i], a[i] -> a bigger load.
3476     if (!SeenIdx.insert(Idx).second)
3477       return None;
3478 
3479     // Every load must share the same base pointer; don't combine things like:
3480     //
3481     // a[i], b[i + 1] -> a bigger load.
3482     if (!BasePtr.isValid())
3483       BasePtr = LoadPtr;
3484     if (BasePtr != LoadPtr)
3485       return None;
3486 
3487     if (Idx < LowestIdx) {
3488       LowestIdx = Idx;
3489       LowestIdxLoad = Load;
3490     }
3491 
3492     // Keep track of the byte offset that this load ends up at. If we have seen
3493     // the byte offset, then stop here. We do not want to combine:
3494     //
3495     // a[i] << 16, a[i + k] << 16 -> a bigger load.
3496     if (!MemOffset2Idx.try_emplace(DstPos, Idx).second)
3497       return None;
3498     Loads.insert(Load);
3499 
3500     // Keep track of the position of the earliest/latest loads in the pattern.
3501     // We will check that there are no load fold barriers between them later
3502     // on.
3503     //
3504     // FIXME: Is there a better way to check for load fold barriers?
3505     if (!EarliestLoad || dominates(*Load, *EarliestLoad))
3506       EarliestLoad = Load;
3507     if (!LatestLoad || dominates(*LatestLoad, *Load))
3508       LatestLoad = Load;
3509   }
3510 
3511   // We found a load for each register. Let's check if each load satisfies the
3512   // pattern.
3513   assert(Loads.size() == RegsToVisit.size() &&
3514          "Expected to find a load for each register?");
3515   assert(EarliestLoad != LatestLoad && EarliestLoad &&
3516          LatestLoad && "Expected at least two loads?");
3517 
3518   // Check if there are any stores, calls, etc. between any of the loads. If
3519   // there are, then we can't safely perform the combine.
3520   //
3521   // MaxIter is chosen based off the (worst case) number of iterations it
3522   // typically takes to succeed in the LLVM test suite plus some padding.
3523   //
3524   // FIXME: Is there a better way to check for load fold barriers?
3525   const unsigned MaxIter = 20;
3526   unsigned Iter = 0;
3527   for (const auto &MI : instructionsWithoutDebug(EarliestLoad->getIterator(),
3528                                                  LatestLoad->getIterator())) {
3529     if (Loads.count(&MI))
3530       continue;
3531     if (MI.isLoadFoldBarrier())
3532       return None;
3533     if (Iter++ == MaxIter)
3534       return None;
3535   }
3536 
3537   return std::make_pair(LowestIdxLoad, LowestIdx);
3538 }
3539 
3540 bool CombinerHelper::matchLoadOrCombine(
3541     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3542   assert(MI.getOpcode() == TargetOpcode::G_OR);
3543   MachineFunction &MF = *MI.getMF();
3544   // Assuming a little-endian target, transform:
3545   //  s8 *a = ...
3546   //  s32 val = a[0] | (a[1] << 8) | (a[2] << 16) | (a[3] << 24)
3547   // =>
3548   //  s32 val = *((i32)a)
3549   //
3550   //  s8 *a = ...
3551   //  s32 val = (a[0] << 24) | (a[1] << 16) | (a[2] << 8) | a[3]
3552   // =>
3553   //  s32 val = BSWAP(*((s32)a))
3554   Register Dst = MI.getOperand(0).getReg();
3555   LLT Ty = MRI.getType(Dst);
3556   if (Ty.isVector())
3557     return false;
3558 
3559   // We need to combine at least two loads into this type. Since the smallest
3560   // possible load is into a byte, we need at least a 16-bit wide type.
3561   const unsigned WideMemSizeInBits = Ty.getSizeInBits();
3562   if (WideMemSizeInBits < 16 || WideMemSizeInBits % 8 != 0)
3563     return false;
3564 
3565   // Match a collection of non-OR instructions in the pattern.
3566   auto RegsToVisit = findCandidatesForLoadOrCombine(&MI);
3567   if (!RegsToVisit)
3568     return false;
3569 
3570   // We have a collection of non-OR instructions. Figure out how wide each of
3571   // the small loads should be based off of the number of potential loads we
3572   // found.
3573   const unsigned NarrowMemSizeInBits = WideMemSizeInBits / RegsToVisit->size();
3574   if (NarrowMemSizeInBits % 8 != 0)
3575     return false;
3576 
3577   // Check if each register feeding into each OR is a load from the same
3578   // base pointer + some arithmetic.
3579   //
3580   // e.g. a[0], a[1] << 8, a[2] << 16, etc.
3581   //
3582   // Also verify that each of these ends up putting a[i] into the same memory
3583   // offset as a load into a wide type would.
3584   SmallDenseMap<int64_t, int64_t, 8> MemOffset2Idx;
3585   MachineInstr *LowestIdxLoad;
3586   int64_t LowestIdx;
3587   auto MaybeLoadInfo = findLoadOffsetsForLoadOrCombine(
3588       MemOffset2Idx, *RegsToVisit, NarrowMemSizeInBits);
3589   if (!MaybeLoadInfo)
3590     return false;
3591   std::tie(LowestIdxLoad, LowestIdx) = *MaybeLoadInfo;
3592 
3593   // We have a bunch of loads being OR'd together. Using the addresses + offsets
3594   // we found before, check if this corresponds to a big or little endian byte
3595   // pattern. If it does, then we can represent it using a load + possibly a
3596   // BSWAP.
3597   bool IsBigEndianTarget = MF.getDataLayout().isBigEndian();
3598   Optional<bool> IsBigEndian = isBigEndian(MemOffset2Idx, LowestIdx);
3599   if (!IsBigEndian.hasValue())
3600     return false;
3601   bool NeedsBSwap = IsBigEndianTarget != *IsBigEndian;
3602   if (NeedsBSwap && !isLegalOrBeforeLegalizer({TargetOpcode::G_BSWAP, {Ty}}))
3603     return false;
3604 
3605   // Make sure that the load from the lowest index produces offset 0 in the
3606   // final value.
3607   //
3608   // This ensures that we won't combine something like this:
3609   //
3610   // load x[i] -> byte 2
3611   // load x[i+1] -> byte 0 ---> wide_load x[i]
3612   // load x[i+2] -> byte 1
3613   const unsigned NumLoadsInTy = WideMemSizeInBits / NarrowMemSizeInBits;
3614   const unsigned ZeroByteOffset =
3615       *IsBigEndian
3616           ? bigEndianByteAt(NumLoadsInTy, 0)
3617           : littleEndianByteAt(NumLoadsInTy, 0);
3618   auto ZeroOffsetIdx = MemOffset2Idx.find(ZeroByteOffset);
3619   if (ZeroOffsetIdx == MemOffset2Idx.end() ||
3620       ZeroOffsetIdx->second != LowestIdx)
3621     return false;
3622 
3623   // We wil reuse the pointer from the load which ends up at byte offset 0. It
3624   // may not use index 0.
3625   Register Ptr = LowestIdxLoad->getOperand(1).getReg();
3626   const MachineMemOperand &MMO = **LowestIdxLoad->memoperands_begin();
3627   LegalityQuery::MemDesc MMDesc;
3628   MMDesc.SizeInBits = WideMemSizeInBits;
3629   MMDesc.AlignInBits = MMO.getAlign().value() * 8;
3630   MMDesc.Ordering = MMO.getOrdering();
3631   if (!isLegalOrBeforeLegalizer(
3632           {TargetOpcode::G_LOAD, {Ty, MRI.getType(Ptr)}, {MMDesc}}))
3633     return false;
3634   auto PtrInfo = MMO.getPointerInfo();
3635   auto *NewMMO = MF.getMachineMemOperand(&MMO, PtrInfo, WideMemSizeInBits / 8);
3636 
3637   // Load must be allowed and fast on the target.
3638   LLVMContext &C = MF.getFunction().getContext();
3639   auto &DL = MF.getDataLayout();
3640   bool Fast = false;
3641   if (!getTargetLowering().allowsMemoryAccess(C, DL, Ty, *NewMMO, &Fast) ||
3642       !Fast)
3643     return false;
3644 
3645   MatchInfo = [=](MachineIRBuilder &MIB) {
3646     Register LoadDst = NeedsBSwap ? MRI.cloneVirtualRegister(Dst) : Dst;
3647     MIB.buildLoad(LoadDst, Ptr, *NewMMO);
3648     if (NeedsBSwap)
3649       MIB.buildBSwap(Dst, LoadDst);
3650   };
3651   return true;
3652 }
3653 
3654 bool CombinerHelper::matchExtendThroughPhis(MachineInstr &MI,
3655                                             MachineInstr *&ExtMI) {
3656   assert(MI.getOpcode() == TargetOpcode::G_PHI);
3657 
3658   Register DstReg = MI.getOperand(0).getReg();
3659 
3660   // TODO: Extending a vector may be expensive, don't do this until heuristics
3661   // are better.
3662   if (MRI.getType(DstReg).isVector())
3663     return false;
3664 
3665   // Try to match a phi, whose only use is an extend.
3666   if (!MRI.hasOneNonDBGUse(DstReg))
3667     return false;
3668   ExtMI = &*MRI.use_instr_nodbg_begin(DstReg);
3669   switch (ExtMI->getOpcode()) {
3670   case TargetOpcode::G_ANYEXT:
3671     return true; // G_ANYEXT is usually free.
3672   case TargetOpcode::G_ZEXT:
3673   case TargetOpcode::G_SEXT:
3674     break;
3675   default:
3676     return false;
3677   }
3678 
3679   // If the target is likely to fold this extend away, don't propagate.
3680   if (Builder.getTII().isExtendLikelyToBeFolded(*ExtMI, MRI))
3681     return false;
3682 
3683   // We don't want to propagate the extends unless there's a good chance that
3684   // they'll be optimized in some way.
3685   // Collect the unique incoming values.
3686   SmallPtrSet<MachineInstr *, 4> InSrcs;
3687   for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
3688     auto *DefMI = getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI);
3689     switch (DefMI->getOpcode()) {
3690     case TargetOpcode::G_LOAD:
3691     case TargetOpcode::G_TRUNC:
3692     case TargetOpcode::G_SEXT:
3693     case TargetOpcode::G_ZEXT:
3694     case TargetOpcode::G_ANYEXT:
3695     case TargetOpcode::G_CONSTANT:
3696       InSrcs.insert(getDefIgnoringCopies(MI.getOperand(Idx).getReg(), MRI));
3697       // Don't try to propagate if there are too many places to create new
3698       // extends, chances are it'll increase code size.
3699       if (InSrcs.size() > 2)
3700         return false;
3701       break;
3702     default:
3703       return false;
3704     }
3705   }
3706   return true;
3707 }
3708 
3709 bool CombinerHelper::applyExtendThroughPhis(MachineInstr &MI,
3710                                             MachineInstr *&ExtMI) {
3711   assert(MI.getOpcode() == TargetOpcode::G_PHI);
3712   Register DstReg = ExtMI->getOperand(0).getReg();
3713   LLT ExtTy = MRI.getType(DstReg);
3714 
3715   // Propagate the extension into the block of each incoming reg's block.
3716   // Use a SetVector here because PHIs can have duplicate edges, and we want
3717   // deterministic iteration order.
3718   SmallSetVector<MachineInstr *, 8> SrcMIs;
3719   SmallDenseMap<MachineInstr *, MachineInstr *, 8> OldToNewSrcMap;
3720   for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); SrcIdx += 2) {
3721     auto *SrcMI = MRI.getVRegDef(MI.getOperand(SrcIdx).getReg());
3722     if (!SrcMIs.insert(SrcMI))
3723       continue;
3724 
3725     // Build an extend after each src inst.
3726     auto *MBB = SrcMI->getParent();
3727     MachineBasicBlock::iterator InsertPt = ++SrcMI->getIterator();
3728     if (InsertPt != MBB->end() && InsertPt->isPHI())
3729       InsertPt = MBB->getFirstNonPHI();
3730 
3731     Builder.setInsertPt(*SrcMI->getParent(), InsertPt);
3732     Builder.setDebugLoc(MI.getDebugLoc());
3733     auto NewExt = Builder.buildExtOrTrunc(ExtMI->getOpcode(), ExtTy,
3734                                           SrcMI->getOperand(0).getReg());
3735     OldToNewSrcMap[SrcMI] = NewExt;
3736   }
3737 
3738   // Create a new phi with the extended inputs.
3739   Builder.setInstrAndDebugLoc(MI);
3740   auto NewPhi = Builder.buildInstrNoInsert(TargetOpcode::G_PHI);
3741   NewPhi.addDef(DstReg);
3742   for (unsigned SrcIdx = 1; SrcIdx < MI.getNumOperands(); ++SrcIdx) {
3743     auto &MO = MI.getOperand(SrcIdx);
3744     if (!MO.isReg()) {
3745       NewPhi.addMBB(MO.getMBB());
3746       continue;
3747     }
3748     auto *NewSrc = OldToNewSrcMap[MRI.getVRegDef(MO.getReg())];
3749     NewPhi.addUse(NewSrc->getOperand(0).getReg());
3750   }
3751   Builder.insertInstr(NewPhi);
3752   ExtMI->eraseFromParent();
3753   return true;
3754 }
3755 
3756 bool CombinerHelper::matchExtractVecEltBuildVec(MachineInstr &MI,
3757                                                 Register &Reg) {
3758   assert(MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT);
3759   // If we have a constant index, look for a G_BUILD_VECTOR source
3760   // and find the source register that the index maps to.
3761   Register SrcVec = MI.getOperand(1).getReg();
3762   LLT SrcTy = MRI.getType(SrcVec);
3763   if (!isLegalOrBeforeLegalizer(
3764           {TargetOpcode::G_BUILD_VECTOR, {SrcTy, SrcTy.getElementType()}}))
3765     return false;
3766 
3767   auto Cst = getConstantVRegValWithLookThrough(MI.getOperand(2).getReg(), MRI);
3768   if (!Cst || Cst->Value.getZExtValue() >= SrcTy.getNumElements())
3769     return false;
3770 
3771   unsigned VecIdx = Cst->Value.getZExtValue();
3772   MachineInstr *BuildVecMI =
3773       getOpcodeDef(TargetOpcode::G_BUILD_VECTOR, SrcVec, MRI);
3774   if (!BuildVecMI) {
3775     BuildVecMI = getOpcodeDef(TargetOpcode::G_BUILD_VECTOR_TRUNC, SrcVec, MRI);
3776     if (!BuildVecMI)
3777       return false;
3778     LLT ScalarTy = MRI.getType(BuildVecMI->getOperand(1).getReg());
3779     if (!isLegalOrBeforeLegalizer(
3780             {TargetOpcode::G_BUILD_VECTOR_TRUNC, {SrcTy, ScalarTy}}))
3781       return false;
3782   }
3783 
3784   EVT Ty(getMVTForLLT(SrcTy));
3785   if (!MRI.hasOneNonDBGUse(SrcVec) &&
3786       !getTargetLowering().aggressivelyPreferBuildVectorSources(Ty))
3787     return false;
3788 
3789   Reg = BuildVecMI->getOperand(VecIdx + 1).getReg();
3790   return true;
3791 }
3792 
3793 void CombinerHelper::applyExtractVecEltBuildVec(MachineInstr &MI,
3794                                                 Register &Reg) {
3795   // Check the type of the register, since it may have come from a
3796   // G_BUILD_VECTOR_TRUNC.
3797   LLT ScalarTy = MRI.getType(Reg);
3798   Register DstReg = MI.getOperand(0).getReg();
3799   LLT DstTy = MRI.getType(DstReg);
3800 
3801   Builder.setInstrAndDebugLoc(MI);
3802   if (ScalarTy != DstTy) {
3803     assert(ScalarTy.getSizeInBits() > DstTy.getSizeInBits());
3804     Builder.buildTrunc(DstReg, Reg);
3805     MI.eraseFromParent();
3806     return;
3807   }
3808   replaceSingleDefInstWithReg(MI, Reg);
3809 }
3810 
3811 bool CombinerHelper::matchExtractAllEltsFromBuildVector(
3812     MachineInstr &MI,
3813     SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3814   assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3815   // This combine tries to find build_vector's which have every source element
3816   // extracted using G_EXTRACT_VECTOR_ELT. This can happen when transforms like
3817   // the masked load scalarization is run late in the pipeline. There's already
3818   // a combine for a similar pattern starting from the extract, but that
3819   // doesn't attempt to do it if there are multiple uses of the build_vector,
3820   // which in this case is true. Starting the combine from the build_vector
3821   // feels more natural than trying to find sibling nodes of extracts.
3822   // E.g.
3823   //  %vec(<4 x s32>) = G_BUILD_VECTOR %s1(s32), %s2, %s3, %s4
3824   //  %ext1 = G_EXTRACT_VECTOR_ELT %vec, 0
3825   //  %ext2 = G_EXTRACT_VECTOR_ELT %vec, 1
3826   //  %ext3 = G_EXTRACT_VECTOR_ELT %vec, 2
3827   //  %ext4 = G_EXTRACT_VECTOR_ELT %vec, 3
3828   // ==>
3829   // replace ext{1,2,3,4} with %s{1,2,3,4}
3830 
3831   Register DstReg = MI.getOperand(0).getReg();
3832   LLT DstTy = MRI.getType(DstReg);
3833   unsigned NumElts = DstTy.getNumElements();
3834 
3835   SmallBitVector ExtractedElts(NumElts);
3836   for (auto &II : make_range(MRI.use_instr_nodbg_begin(DstReg),
3837                              MRI.use_instr_nodbg_end())) {
3838     if (II.getOpcode() != TargetOpcode::G_EXTRACT_VECTOR_ELT)
3839       return false;
3840     auto Cst = getConstantVRegVal(II.getOperand(2).getReg(), MRI);
3841     if (!Cst)
3842       return false;
3843     unsigned Idx = Cst.getValue().getZExtValue();
3844     if (Idx >= NumElts)
3845       return false; // Out of range.
3846     ExtractedElts.set(Idx);
3847     SrcDstPairs.emplace_back(
3848         std::make_pair(MI.getOperand(Idx + 1).getReg(), &II));
3849   }
3850   // Match if every element was extracted.
3851   return ExtractedElts.all();
3852 }
3853 
3854 void CombinerHelper::applyExtractAllEltsFromBuildVector(
3855     MachineInstr &MI,
3856     SmallVectorImpl<std::pair<Register, MachineInstr *>> &SrcDstPairs) {
3857   assert(MI.getOpcode() == TargetOpcode::G_BUILD_VECTOR);
3858   for (auto &Pair : SrcDstPairs) {
3859     auto *ExtMI = Pair.second;
3860     replaceRegWith(MRI, ExtMI->getOperand(0).getReg(), Pair.first);
3861     ExtMI->eraseFromParent();
3862   }
3863   MI.eraseFromParent();
3864 }
3865 
3866 bool CombinerHelper::applyBuildFn(
3867     MachineInstr &MI, std::function<void(MachineIRBuilder &)> &MatchInfo) {
3868   Builder.setInstrAndDebugLoc(MI);
3869   MatchInfo(Builder);
3870   MI.eraseFromParent();
3871   return true;
3872 }
3873 
3874 /// Match an FSHL or FSHR that can be combined to a ROTR or ROTL rotate.
3875 bool CombinerHelper::matchFunnelShiftToRotate(MachineInstr &MI) {
3876   unsigned Opc = MI.getOpcode();
3877   assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
3878   Register X = MI.getOperand(1).getReg();
3879   Register Y = MI.getOperand(2).getReg();
3880   if (X != Y)
3881     return false;
3882   unsigned RotateOpc =
3883       Opc == TargetOpcode::G_FSHL ? TargetOpcode::G_ROTL : TargetOpcode::G_ROTR;
3884   return isLegalOrBeforeLegalizer({RotateOpc, {MRI.getType(X), MRI.getType(Y)}});
3885 }
3886 
3887 void CombinerHelper::applyFunnelShiftToRotate(MachineInstr &MI) {
3888   unsigned Opc = MI.getOpcode();
3889   assert(Opc == TargetOpcode::G_FSHL || Opc == TargetOpcode::G_FSHR);
3890   bool IsFSHL = Opc == TargetOpcode::G_FSHL;
3891   Observer.changingInstr(MI);
3892   MI.setDesc(Builder.getTII().get(IsFSHL ? TargetOpcode::G_ROTL
3893                                          : TargetOpcode::G_ROTR));
3894   MI.RemoveOperand(2);
3895   Observer.changedInstr(MI);
3896 }
3897 
3898 // Fold (rot x, c) -> (rot x, c % BitSize)
3899 bool CombinerHelper::matchRotateOutOfRange(MachineInstr &MI) {
3900   assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
3901          MI.getOpcode() == TargetOpcode::G_ROTR);
3902   unsigned Bitsize =
3903       MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
3904   Register AmtReg = MI.getOperand(2).getReg();
3905   bool OutOfRange = false;
3906   auto MatchOutOfRange = [Bitsize, &OutOfRange](const Constant *C) {
3907     if (auto *CI = dyn_cast<ConstantInt>(C))
3908       OutOfRange |= CI->getValue().uge(Bitsize);
3909     return true;
3910   };
3911   return matchUnaryPredicate(MRI, AmtReg, MatchOutOfRange) && OutOfRange;
3912 }
3913 
3914 void CombinerHelper::applyRotateOutOfRange(MachineInstr &MI) {
3915   assert(MI.getOpcode() == TargetOpcode::G_ROTL ||
3916          MI.getOpcode() == TargetOpcode::G_ROTR);
3917   unsigned Bitsize =
3918       MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits();
3919   Builder.setInstrAndDebugLoc(MI);
3920   Register Amt = MI.getOperand(2).getReg();
3921   LLT AmtTy = MRI.getType(Amt);
3922   auto Bits = Builder.buildConstant(AmtTy, Bitsize);
3923   Amt = Builder.buildURem(AmtTy, MI.getOperand(2).getReg(), Bits).getReg(0);
3924   Observer.changingInstr(MI);
3925   MI.getOperand(2).setReg(Amt);
3926   Observer.changedInstr(MI);
3927 }
3928 
3929 bool CombinerHelper::tryCombine(MachineInstr &MI) {
3930   if (tryCombineCopy(MI))
3931     return true;
3932   if (tryCombineExtendingLoads(MI))
3933     return true;
3934   if (tryCombineIndexedLoadStore(MI))
3935     return true;
3936   return false;
3937 }
3938